diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +17 -12
- diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
- diffusers-0.34.0.dist-info/RECORD +639 -0
- diffusers-0.33.0.dist-info/RECORD +0 -608
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -24,7 +24,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel
|
|
24
24
|
from ...schedulers import KarrasDiffusionSchedulers
|
25
25
|
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
26
26
|
from ...utils.torch_utils import randn_tensor
|
27
|
-
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
|
27
|
+
from ..pipeline_utils import AudioPipelineOutput, DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
28
28
|
|
29
29
|
|
30
30
|
if is_torch_xla_available():
|
@@ -57,7 +57,7 @@ EXAMPLE_DOC_STRING = """
|
|
57
57
|
"""
|
58
58
|
|
59
59
|
|
60
|
-
class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
60
|
+
class AudioLDMPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
61
61
|
r"""
|
62
62
|
Pipeline for text-to-audio generation using AudioLDM.
|
63
63
|
|
@@ -81,6 +81,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
81
81
|
Vocoder of class `SpeechT5HifiGan`.
|
82
82
|
"""
|
83
83
|
|
84
|
+
_last_supported_version = "0.33.1"
|
84
85
|
model_cpu_offload_seq = "text_encoder->unet->vae"
|
85
86
|
|
86
87
|
def __init__(
|
@@ -261,7 +262,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
261
262
|
def prepare_extra_step_kwargs(self, generator, eta):
|
262
263
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
263
264
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
264
|
-
# eta corresponds to η in DDIM paper: https://
|
265
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
265
266
|
# and should be between [0, 1]
|
266
267
|
|
267
268
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -397,8 +398,8 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
397
398
|
num_waveforms_per_prompt (`int`, *optional*, defaults to 1):
|
398
399
|
The number of waveforms to generate per prompt.
|
399
400
|
eta (`float`, *optional*, defaults to 0.0):
|
400
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
401
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
401
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
402
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
402
403
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
403
404
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
404
405
|
generation deterministic.
|
@@ -472,7 +473,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
472
473
|
|
473
474
|
device = self._execution_device
|
474
475
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
475
|
-
# of the Imagen paper: https://
|
476
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
476
477
|
# corresponds to doing no classifier free guidance.
|
477
478
|
do_classifier_free_guidance = guidance_scale > 1.0
|
478
479
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 CVSSP, ByteDance and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -40,7 +40,8 @@ from ...utils import (
|
|
40
40
|
logging,
|
41
41
|
replace_example_docstring,
|
42
42
|
)
|
43
|
-
from ...utils.
|
43
|
+
from ...utils.import_utils import is_transformers_version
|
44
|
+
from ...utils.torch_utils import empty_device_cache, randn_tensor
|
44
45
|
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
|
45
46
|
from .modeling_audioldm2 import AudioLDM2ProjectionModel, AudioLDM2UNet2DConditionModel
|
46
47
|
|
@@ -266,9 +267,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
266
267
|
|
267
268
|
if self.device.type != "cpu":
|
268
269
|
self.to("cpu", silence_dtype_warnings=True)
|
269
|
-
|
270
|
-
if hasattr(device_mod, "empty_cache") and device_mod.is_available():
|
271
|
-
device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
|
270
|
+
empty_device_cache(device.type)
|
272
271
|
|
273
272
|
model_sequence = [
|
274
273
|
self.text_encoder.text_model,
|
@@ -312,8 +311,19 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
312
311
|
`inputs_embeds (`torch.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
|
313
312
|
The sequence of generated hidden-states.
|
314
313
|
"""
|
314
|
+
cache_position_kwargs = {}
|
315
|
+
if is_transformers_version("<", "4.52.0.dev0"):
|
316
|
+
cache_position_kwargs["input_ids"] = inputs_embeds
|
317
|
+
cache_position_kwargs["model_kwargs"] = model_kwargs
|
318
|
+
else:
|
319
|
+
cache_position_kwargs["seq_length"] = inputs_embeds.shape[0]
|
320
|
+
cache_position_kwargs["device"] = (
|
321
|
+
self.language_model.device if getattr(self, "language_model", None) is not None else self.device
|
322
|
+
)
|
323
|
+
cache_position_kwargs["model_kwargs"] = model_kwargs
|
315
324
|
max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens
|
316
|
-
model_kwargs = self.language_model._get_initial_cache_position(
|
325
|
+
model_kwargs = self.language_model._get_initial_cache_position(**cache_position_kwargs)
|
326
|
+
|
317
327
|
for _ in range(max_new_tokens):
|
318
328
|
# prepare model inputs
|
319
329
|
model_inputs = prepare_inputs_for_generation(inputs_embeds, **model_kwargs)
|
@@ -373,7 +383,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
373
383
|
*e.g.* prompt weighting. If not provided, negative_prompt_embeds will be computed from
|
374
384
|
`negative_prompt` input argument.
|
375
385
|
generated_prompt_embeds (`torch.Tensor`, *optional*):
|
376
|
-
Pre-generated text embeddings from the GPT2
|
386
|
+
Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs,
|
377
387
|
*e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input
|
378
388
|
argument.
|
379
389
|
negative_generated_prompt_embeds (`torch.Tensor`, *optional*):
|
@@ -394,7 +404,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
394
404
|
attention_mask (`torch.LongTensor`):
|
395
405
|
Attention mask to be applied to the `prompt_embeds`.
|
396
406
|
generated_prompt_embeds (`torch.Tensor`):
|
397
|
-
Text embeddings generated from the GPT2
|
407
|
+
Text embeddings generated from the GPT2 language model.
|
398
408
|
|
399
409
|
Example:
|
400
410
|
|
@@ -701,7 +711,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
701
711
|
def prepare_extra_step_kwargs(self, generator, eta):
|
702
712
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
703
713
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
704
|
-
# eta corresponds to η in DDIM paper: https://
|
714
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
705
715
|
# and should be between [0, 1]
|
706
716
|
|
707
717
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -888,8 +898,8 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
888
898
|
generated waveforms based on their cosine similarity with the text input in the joint text-audio
|
889
899
|
embedding space.
|
890
900
|
eta (`float`, *optional*, defaults to 0.0):
|
891
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
892
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
901
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
902
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
893
903
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
894
904
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
895
905
|
generation deterministic.
|
@@ -904,7 +914,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
904
914
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
905
915
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
906
916
|
generated_prompt_embeds (`torch.Tensor`, *optional*):
|
907
|
-
Pre-generated text embeddings from the GPT2
|
917
|
+
Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs,
|
908
918
|
*e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input
|
909
919
|
argument.
|
910
920
|
negative_generated_prompt_embeds (`torch.Tensor`, *optional*):
|
@@ -987,7 +997,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
987
997
|
|
988
998
|
device = self._execution_device
|
989
999
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
990
|
-
# of the Imagen paper: https://
|
1000
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
991
1001
|
# corresponds to doing no classifier free guidance.
|
992
1002
|
do_classifier_free_guidance = guidance_scale > 1.0
|
993
1003
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 AuraFlow Authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -12,17 +12,25 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
import inspect
|
15
|
-
from typing import Callable, Dict, List, Optional, Tuple, Union
|
15
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
16
16
|
|
17
17
|
import torch
|
18
18
|
from transformers import T5Tokenizer, UMT5EncoderModel
|
19
19
|
|
20
20
|
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
21
21
|
from ...image_processor import VaeImageProcessor
|
22
|
+
from ...loaders import AuraFlowLoraLoaderMixin
|
22
23
|
from ...models import AuraFlowTransformer2DModel, AutoencoderKL
|
23
24
|
from ...models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor
|
24
25
|
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
25
|
-
from ...utils import
|
26
|
+
from ...utils import (
|
27
|
+
USE_PEFT_BACKEND,
|
28
|
+
is_torch_xla_available,
|
29
|
+
logging,
|
30
|
+
replace_example_docstring,
|
31
|
+
scale_lora_layers,
|
32
|
+
unscale_lora_layers,
|
33
|
+
)
|
26
34
|
from ...utils.torch_utils import randn_tensor
|
27
35
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
28
36
|
|
@@ -112,7 +120,7 @@ def retrieve_timesteps(
|
|
112
120
|
return timesteps, num_inference_steps
|
113
121
|
|
114
122
|
|
115
|
-
class AuraFlowPipeline(DiffusionPipeline):
|
123
|
+
class AuraFlowPipeline(DiffusionPipeline, AuraFlowLoraLoaderMixin):
|
116
124
|
r"""
|
117
125
|
Args:
|
118
126
|
tokenizer (`T5TokenizerFast`):
|
@@ -233,6 +241,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
233
241
|
prompt_attention_mask: Optional[torch.Tensor] = None,
|
234
242
|
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
|
235
243
|
max_sequence_length: int = 256,
|
244
|
+
lora_scale: Optional[float] = None,
|
236
245
|
):
|
237
246
|
r"""
|
238
247
|
Encodes the prompt into text encoder hidden states.
|
@@ -259,10 +268,20 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
259
268
|
negative_prompt_attention_mask (`torch.Tensor`, *optional*):
|
260
269
|
Pre-generated attention mask for negative text embeddings.
|
261
270
|
max_sequence_length (`int`, defaults to 256): Maximum sequence length to use for the prompt.
|
271
|
+
lora_scale (`float`, *optional*):
|
272
|
+
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
262
273
|
"""
|
274
|
+
# set lora scale so that monkey patched LoRA
|
275
|
+
# function of text encoder can correctly access it
|
276
|
+
if lora_scale is not None and isinstance(self, AuraFlowLoraLoaderMixin):
|
277
|
+
self._lora_scale = lora_scale
|
278
|
+
|
279
|
+
# dynamically adjust the LoRA scale
|
280
|
+
if self.text_encoder is not None and USE_PEFT_BACKEND:
|
281
|
+
scale_lora_layers(self.text_encoder, lora_scale)
|
282
|
+
|
263
283
|
if device is None:
|
264
284
|
device = self._execution_device
|
265
|
-
|
266
285
|
if prompt is not None and isinstance(prompt, str):
|
267
286
|
batch_size = 1
|
268
287
|
elif prompt is not None and isinstance(prompt, list):
|
@@ -346,6 +365,11 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
346
365
|
negative_prompt_embeds = None
|
347
366
|
negative_prompt_attention_mask = None
|
348
367
|
|
368
|
+
if self.text_encoder is not None:
|
369
|
+
if isinstance(self, AuraFlowLoraLoaderMixin) and USE_PEFT_BACKEND:
|
370
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
371
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
372
|
+
|
349
373
|
return prompt_embeds, prompt_attention_mask, negative_prompt_embeds, negative_prompt_attention_mask
|
350
374
|
|
351
375
|
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.prepare_latents
|
@@ -403,6 +427,10 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
403
427
|
def guidance_scale(self):
|
404
428
|
return self._guidance_scale
|
405
429
|
|
430
|
+
@property
|
431
|
+
def attention_kwargs(self):
|
432
|
+
return self._attention_kwargs
|
433
|
+
|
406
434
|
@property
|
407
435
|
def num_timesteps(self):
|
408
436
|
return self._num_timesteps
|
@@ -428,6 +456,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
428
456
|
max_sequence_length: int = 256,
|
429
457
|
output_type: Optional[str] = "pil",
|
430
458
|
return_dict: bool = True,
|
459
|
+
attention_kwargs: Optional[Dict[str, Any]] = None,
|
431
460
|
callback_on_step_end: Optional[
|
432
461
|
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
433
462
|
] = None,
|
@@ -455,11 +484,11 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
455
484
|
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
456
485
|
`num_inference_steps` and `timesteps` must be `None`.
|
457
486
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
458
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
459
|
-
`guidance_scale` is defined as `w` of equation 2.
|
460
|
-
Paper](https://
|
461
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
462
|
-
usually at the expense of lower image quality.
|
487
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
488
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
489
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
490
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
491
|
+
the text `prompt`, usually at the expense of lower image quality.
|
463
492
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
464
493
|
The number of images to generate per prompt.
|
465
494
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -486,6 +515,10 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
486
515
|
return_dict (`bool`, *optional*, defaults to `True`):
|
487
516
|
Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
|
488
517
|
of a plain tuple.
|
518
|
+
attention_kwargs (`dict`, *optional*):
|
519
|
+
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
520
|
+
`self.processor` in
|
521
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
489
522
|
callback_on_step_end (`Callable`, *optional*):
|
490
523
|
A function that calls at the end of each denoising steps during the inference. The function is called
|
491
524
|
with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
|
@@ -520,6 +553,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
520
553
|
)
|
521
554
|
|
522
555
|
self._guidance_scale = guidance_scale
|
556
|
+
self._attention_kwargs = attention_kwargs
|
523
557
|
|
524
558
|
# 2. Determine batch size.
|
525
559
|
if prompt is not None and isinstance(prompt, str):
|
@@ -530,9 +564,10 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
530
564
|
batch_size = prompt_embeds.shape[0]
|
531
565
|
|
532
566
|
device = self._execution_device
|
567
|
+
lora_scale = self.attention_kwargs.get("scale", None) if self.attention_kwargs is not None else None
|
533
568
|
|
534
569
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
535
|
-
# of the Imagen paper: https://
|
570
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
536
571
|
# corresponds to doing no classifier free guidance.
|
537
572
|
do_classifier_free_guidance = guidance_scale > 1.0
|
538
573
|
|
@@ -553,6 +588,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
553
588
|
prompt_attention_mask=prompt_attention_mask,
|
554
589
|
negative_prompt_attention_mask=negative_prompt_attention_mask,
|
555
590
|
max_sequence_length=max_sequence_length,
|
591
|
+
lora_scale=lora_scale,
|
556
592
|
)
|
557
593
|
if do_classifier_free_guidance:
|
558
594
|
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
|
@@ -594,6 +630,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
594
630
|
encoder_hidden_states=prompt_embeds,
|
595
631
|
timestep=timestep,
|
596
632
|
return_dict=False,
|
633
|
+
attention_kwargs=self.attention_kwargs,
|
597
634
|
)[0]
|
598
635
|
|
599
636
|
# perform guidance
|
@@ -21,6 +21,7 @@ from ..configuration_utils import ConfigMixin
|
|
21
21
|
from ..models.controlnets import ControlNetUnionModel
|
22
22
|
from ..utils import is_sentencepiece_available
|
23
23
|
from .aura_flow import AuraFlowPipeline
|
24
|
+
from .chroma import ChromaPipeline
|
24
25
|
from .cogview3 import CogView3PlusPipeline
|
25
26
|
from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
|
26
27
|
from .controlnet import (
|
@@ -143,6 +144,7 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|
143
144
|
("flux-controlnet", FluxControlNetPipeline),
|
144
145
|
("lumina", LuminaPipeline),
|
145
146
|
("lumina2", Lumina2Pipeline),
|
147
|
+
("chroma", ChromaPipeline),
|
146
148
|
("cogview3", CogView3PlusPipeline),
|
147
149
|
("cogview4", CogView4Pipeline),
|
148
150
|
("cogview4-control", CogView4ControlPipeline),
|
@@ -322,9 +324,8 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
322
324
|
- A path to a *directory* (for example `./my_pipeline_directory/`) containing pipeline weights
|
323
325
|
saved using
|
324
326
|
[`~DiffusionPipeline.save_pretrained`].
|
325
|
-
torch_dtype (`
|
326
|
-
Override the default `torch.dtype` and load the model with another dtype.
|
327
|
-
dtype is automatically derived from the model's weights.
|
327
|
+
torch_dtype (`torch.dtype`, *optional*):
|
328
|
+
Override the default `torch.dtype` and load the model with another dtype.
|
328
329
|
force_download (`bool`, *optional*, defaults to `False`):
|
329
330
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
330
331
|
cached versions if they exist.
|
@@ -619,8 +620,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
619
620
|
saved using
|
620
621
|
[`~DiffusionPipeline.save_pretrained`].
|
621
622
|
torch_dtype (`str` or `torch.dtype`, *optional*):
|
622
|
-
Override the default `torch.dtype` and load the model with another dtype.
|
623
|
-
dtype is automatically derived from the model's weights.
|
623
|
+
Override the default `torch.dtype` and load the model with another dtype.
|
624
624
|
force_download (`bool`, *optional*, defaults to `False`):
|
625
625
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
626
626
|
cached versions if they exist.
|
@@ -930,8 +930,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
930
930
|
saved using
|
931
931
|
[`~DiffusionPipeline.save_pretrained`].
|
932
932
|
torch_dtype (`str` or `torch.dtype`, *optional*):
|
933
|
-
Override the default `torch.dtype` and load the model with another dtype.
|
934
|
-
dtype is automatically derived from the model's weights.
|
933
|
+
Override the default `torch.dtype` and load the model with another dtype.
|
935
934
|
force_download (`bool`, *optional*, defaults to `False`):
|
936
935
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
937
936
|
cached versions if they exist.
|
@@ -1,5 +1,5 @@
|
|
1
|
-
# Copyright
|
2
|
-
# Copyright
|
1
|
+
# Copyright 2025 Salesforce.com, inc.
|
2
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
5
|
# you may not use this file except in compliance with the License.
|
@@ -1,5 +1,5 @@
|
|
1
|
-
# Copyright
|
2
|
-
# Copyright
|
1
|
+
# Copyright 2025 Salesforce.com, inc.
|
2
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
5
5
|
# You may obtain a copy of the License at
|
@@ -25,7 +25,7 @@ from ...utils import (
|
|
25
25
|
replace_example_docstring,
|
26
26
|
)
|
27
27
|
from ...utils.torch_utils import randn_tensor
|
28
|
-
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
28
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
29
29
|
from .blip_image_processing import BlipImageProcessor
|
30
30
|
from .modeling_blip2 import Blip2QFormerModel
|
31
31
|
from .modeling_ctx_clip import ContextCLIPTextModel
|
@@ -81,7 +81,7 @@ EXAMPLE_DOC_STRING = """
|
|
81
81
|
"""
|
82
82
|
|
83
83
|
|
84
|
-
class BlipDiffusionPipeline(DiffusionPipeline):
|
84
|
+
class BlipDiffusionPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
85
85
|
"""
|
86
86
|
Pipeline for Zero-Shot Subject Driven Generation using Blip Diffusion.
|
87
87
|
|
@@ -107,6 +107,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
|
107
107
|
Position of the context token in the text encoder.
|
108
108
|
"""
|
109
109
|
|
110
|
+
_last_supported_version = "0.33.1"
|
110
111
|
model_cpu_offload_seq = "qformer->text_encoder->unet->vae"
|
111
112
|
|
112
113
|
def __init__(
|
@@ -138,7 +139,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
|
138
139
|
def get_query_embeddings(self, input_image, src_subject):
|
139
140
|
return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False)
|
140
141
|
|
141
|
-
# from the original Blip Diffusion code,
|
142
|
+
# from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it
|
142
143
|
def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20):
|
143
144
|
rv = []
|
144
145
|
for prompt, tgt_subject in zip(prompts, tgt_subjects):
|
@@ -229,11 +230,11 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
|
229
230
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
230
231
|
tensor will ge generated by random sampling.
|
231
232
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
232
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
233
|
-
`guidance_scale` is defined as `w` of equation 2.
|
234
|
-
Paper](https://
|
235
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
236
|
-
usually at the expense of lower image quality.
|
233
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
234
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
235
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
236
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
237
|
+
the text `prompt`, usually at the expense of lower image quality.
|
237
238
|
height (`int`, *optional*, defaults to 512):
|
238
239
|
The height of the generated image.
|
239
240
|
width (`int`, *optional*, defaults to 512):
|
@@ -0,0 +1,49 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_additional_imports = {}
|
15
|
+
_import_structure = {"pipeline_output": ["ChromaPipelineOutput"]}
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_chroma"] = ["ChromaPipeline"]
|
26
|
+
_import_structure["pipeline_chroma_img2img"] = ["ChromaImg2ImgPipeline"]
|
27
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
|
+
try:
|
29
|
+
if not (is_transformers_available() and is_torch_available()):
|
30
|
+
raise OptionalDependencyNotAvailable()
|
31
|
+
except OptionalDependencyNotAvailable:
|
32
|
+
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
|
33
|
+
else:
|
34
|
+
from .pipeline_chroma import ChromaPipeline
|
35
|
+
from .pipeline_chroma_img2img import ChromaImg2ImgPipeline
|
36
|
+
else:
|
37
|
+
import sys
|
38
|
+
|
39
|
+
sys.modules[__name__] = _LazyModule(
|
40
|
+
__name__,
|
41
|
+
globals()["__file__"],
|
42
|
+
_import_structure,
|
43
|
+
module_spec=__spec__,
|
44
|
+
)
|
45
|
+
|
46
|
+
for name, value in _dummy_objects.items():
|
47
|
+
setattr(sys.modules[__name__], name, value)
|
48
|
+
for name, value in _additional_imports.items():
|
49
|
+
setattr(sys.modules[__name__], name, value)
|