diffusers 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +13 -10
- diffusers/pipelines/wan/pipeline_wan_i2v.py +38 -18
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/METADATA +70 -55
- diffusers-0.34.0.dist-info/RECORD +639 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -33,7 +33,7 @@ from ...utils import (
|
|
33
33
|
)
|
34
34
|
from ...utils.torch_utils import randn_tensor
|
35
35
|
from ...video_processor import VideoProcessor
|
36
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
36
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
37
37
|
from . import TextToVideoSDPipelineOutput
|
38
38
|
|
39
39
|
|
@@ -68,8 +68,13 @@ EXAMPLE_DOC_STRING = """
|
|
68
68
|
|
69
69
|
|
70
70
|
class TextToVideoSDPipeline(
|
71
|
-
|
71
|
+
DeprecatedPipelineMixin,
|
72
|
+
DiffusionPipeline,
|
73
|
+
StableDiffusionMixin,
|
74
|
+
TextualInversionLoaderMixin,
|
75
|
+
StableDiffusionLoraLoaderMixin,
|
72
76
|
):
|
77
|
+
_last_supported_version = "0.33.1"
|
73
78
|
r"""
|
74
79
|
Pipeline for text-to-video generation.
|
75
80
|
|
@@ -349,7 +354,7 @@ class TextToVideoSDPipeline(
|
|
349
354
|
def prepare_extra_step_kwargs(self, generator, eta):
|
350
355
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
351
356
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
352
|
-
# eta corresponds to η in DDIM paper: https://
|
357
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
353
358
|
# and should be between [0, 1]
|
354
359
|
|
355
360
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -489,8 +494,8 @@ class TextToVideoSDPipeline(
|
|
489
494
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
490
495
|
The number of images to generate per prompt.
|
491
496
|
eta (`float`, *optional*, defaults to 0.0):
|
492
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
493
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
497
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
498
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
494
499
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
495
500
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
496
501
|
generation deterministic.
|
@@ -550,7 +555,7 @@ class TextToVideoSDPipeline(
|
|
550
555
|
|
551
556
|
device = self._execution_device
|
552
557
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
553
|
-
# of the Imagen paper: https://
|
558
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
554
559
|
# corresponds to doing no classifier free guidance.
|
555
560
|
do_classifier_free_guidance = guidance_scale > 1.0
|
556
561
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -34,7 +34,7 @@ from ...utils import (
|
|
34
34
|
)
|
35
35
|
from ...utils.torch_utils import randn_tensor
|
36
36
|
from ...video_processor import VideoProcessor
|
37
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
37
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
38
38
|
from . import TextToVideoSDPipelineOutput
|
39
39
|
|
40
40
|
|
@@ -103,8 +103,13 @@ def retrieve_latents(
|
|
103
103
|
|
104
104
|
|
105
105
|
class VideoToVideoSDPipeline(
|
106
|
-
|
106
|
+
DeprecatedPipelineMixin,
|
107
|
+
DiffusionPipeline,
|
108
|
+
StableDiffusionMixin,
|
109
|
+
TextualInversionLoaderMixin,
|
110
|
+
StableDiffusionLoraLoaderMixin,
|
107
111
|
):
|
112
|
+
_last_supported_version = "0.33.1"
|
108
113
|
r"""
|
109
114
|
Pipeline for text-guided video-to-video generation.
|
110
115
|
|
@@ -385,7 +390,7 @@ class VideoToVideoSDPipeline(
|
|
385
390
|
def prepare_extra_step_kwargs(self, generator, eta):
|
386
391
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
387
392
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
388
|
-
# eta corresponds to η in DDIM paper: https://
|
393
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
389
394
|
# and should be between [0, 1]
|
390
395
|
|
391
396
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -553,8 +558,8 @@ class VideoToVideoSDPipeline(
|
|
553
558
|
The prompt or prompts to guide what to not include in video generation. If not defined, you need to
|
554
559
|
pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
|
555
560
|
eta (`float`, *optional*, defaults to 0.0):
|
556
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
557
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
561
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
562
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
558
563
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
559
564
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
560
565
|
generation deterministic.
|
@@ -609,7 +614,7 @@ class VideoToVideoSDPipeline(
|
|
609
614
|
|
610
615
|
device = self._execution_device
|
611
616
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
612
|
-
# of the Imagen paper: https://
|
617
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
613
618
|
# corresponds to doing no classifier free guidance.
|
614
619
|
do_classifier_free_guidance = guidance_scale > 1.0
|
615
620
|
|
@@ -23,8 +23,8 @@ from ...utils import (
|
|
23
23
|
scale_lora_layers,
|
24
24
|
unscale_lora_layers,
|
25
25
|
)
|
26
|
-
from ...utils.torch_utils import randn_tensor
|
27
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
26
|
+
from ...utils.torch_utils import empty_device_cache, randn_tensor
|
27
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
28
28
|
from ..stable_diffusion import StableDiffusionSafetyChecker
|
29
29
|
|
30
30
|
|
@@ -296,12 +296,14 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s
|
|
296
296
|
|
297
297
|
|
298
298
|
class TextToVideoZeroPipeline(
|
299
|
+
DeprecatedPipelineMixin,
|
299
300
|
DiffusionPipeline,
|
300
301
|
StableDiffusionMixin,
|
301
302
|
TextualInversionLoaderMixin,
|
302
303
|
StableDiffusionLoraLoaderMixin,
|
303
304
|
FromSingleFileMixin,
|
304
305
|
):
|
306
|
+
_last_supported_version = "0.33.1"
|
305
307
|
r"""
|
306
308
|
Pipeline for zero-shot text-to-video generation using Stable Diffusion.
|
307
309
|
|
@@ -588,8 +590,8 @@ class TextToVideoZeroPipeline(
|
|
588
590
|
num_videos_per_prompt (`int`, *optional*, defaults to 1):
|
589
591
|
The number of videos to generate per prompt.
|
590
592
|
eta (`float`, *optional*, defaults to 0.0):
|
591
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
592
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
593
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
594
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
593
595
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
594
596
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
595
597
|
generation deterministic.
|
@@ -610,17 +612,17 @@ class TextToVideoZeroPipeline(
|
|
610
612
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
611
613
|
every step.
|
612
614
|
motion_field_strength_x (`float`, *optional*, defaults to 12):
|
613
|
-
Strength of motion in generated video along x-axis. See the
|
614
|
-
Sect. 3.3.1.
|
615
|
+
Strength of motion in generated video along x-axis. See the
|
616
|
+
[paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
|
615
617
|
motion_field_strength_y (`float`, *optional*, defaults to 12):
|
616
|
-
Strength of motion in generated video along y-axis. See the
|
617
|
-
Sect. 3.3.1.
|
618
|
+
Strength of motion in generated video along y-axis. See the
|
619
|
+
[paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
|
618
620
|
t0 (`int`, *optional*, defaults to 44):
|
619
621
|
Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the
|
620
|
-
[paper](https://
|
622
|
+
[paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
|
621
623
|
t1 (`int`, *optional*, defaults to 47):
|
622
624
|
Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the
|
623
|
-
[paper](https://
|
625
|
+
[paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
|
624
626
|
frame_ids (`List[int]`, *optional*):
|
625
627
|
Indexes of the frames that are being generated. This is used when generating longer videos
|
626
628
|
chunk-by-chunk.
|
@@ -663,7 +665,7 @@ class TextToVideoZeroPipeline(
|
|
663
665
|
batch_size = 1 if isinstance(prompt, str) else len(prompt)
|
664
666
|
device = self._execution_device
|
665
667
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
666
|
-
# of the Imagen paper: https://
|
668
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
667
669
|
# corresponds to doing no classifier free guidance.
|
668
670
|
do_classifier_free_guidance = guidance_scale > 1.0
|
669
671
|
|
@@ -758,7 +760,7 @@ class TextToVideoZeroPipeline(
|
|
758
760
|
# manually for max memory savings
|
759
761
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
760
762
|
self.unet.to("cpu")
|
761
|
-
|
763
|
+
empty_device_cache()
|
762
764
|
|
763
765
|
if output_type == "latent":
|
764
766
|
image = latents
|
@@ -797,7 +799,7 @@ class TextToVideoZeroPipeline(
|
|
797
799
|
def prepare_extra_step_kwargs(self, generator, eta):
|
798
800
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
799
801
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
800
|
-
# eta corresponds to η in DDIM paper: https://
|
802
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
801
803
|
# and should be between [0, 1]
|
802
804
|
|
803
805
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -35,7 +35,7 @@ from ...utils import (
|
|
35
35
|
unscale_lora_layers,
|
36
36
|
)
|
37
37
|
from ...utils.torch_utils import randn_tensor
|
38
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
38
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
39
39
|
|
40
40
|
|
41
41
|
if is_invisible_watermark_available():
|
@@ -323,7 +323,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
323
323
|
r"""
|
324
324
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
325
325
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
326
|
-
Flawed](https://
|
326
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
327
327
|
|
328
328
|
Args:
|
329
329
|
noise_cfg (`torch.Tensor`):
|
@@ -346,11 +346,13 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
346
346
|
|
347
347
|
|
348
348
|
class TextToVideoZeroSDXLPipeline(
|
349
|
+
DeprecatedPipelineMixin,
|
349
350
|
DiffusionPipeline,
|
350
351
|
StableDiffusionMixin,
|
351
352
|
StableDiffusionXLLoraLoaderMixin,
|
352
353
|
TextualInversionLoaderMixin,
|
353
354
|
):
|
355
|
+
_last_supported_version = "0.33.1"
|
354
356
|
r"""
|
355
357
|
Pipeline for zero-shot text-to-video generation using Stable Diffusion XL.
|
356
358
|
|
@@ -439,7 +441,7 @@ class TextToVideoZeroSDXLPipeline(
|
|
439
441
|
def prepare_extra_step_kwargs(self, generator, eta):
|
440
442
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
441
443
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
442
|
-
# eta corresponds to η in DDIM paper: https://
|
444
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
443
445
|
# and should be between [0, 1]
|
444
446
|
|
445
447
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -929,7 +931,7 @@ class TextToVideoZeroSDXLPipeline(
|
|
929
931
|
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
930
932
|
|
931
933
|
if do_classifier_free_guidance and guidance_rescale > 0.0:
|
932
|
-
# Based on 3.4. in https://
|
934
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
933
935
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
|
934
936
|
|
935
937
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1009,11 +1011,11 @@ class TextToVideoZeroSDXLPipeline(
|
|
1009
1011
|
"Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
1010
1012
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
|
1011
1013
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
1012
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
1013
|
-
`guidance_scale` is defined as `w` of equation 2.
|
1014
|
-
Paper](https://
|
1015
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
1016
|
-
usually at the expense of lower image quality.
|
1014
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
1015
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
1016
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
1017
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
1018
|
+
the text `prompt`, usually at the expense of lower image quality.
|
1017
1019
|
negative_prompt (`str` or `List[str]`, *optional*):
|
1018
1020
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
1019
1021
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1024,8 +1026,8 @@ class TextToVideoZeroSDXLPipeline(
|
|
1024
1026
|
num_videos_per_prompt (`int`, *optional*, defaults to 1):
|
1025
1027
|
The number of videos to generate per prompt.
|
1026
1028
|
eta (`float`, *optional*, defaults to 0.0):
|
1027
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
1028
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
1029
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
1030
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
1029
1031
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1030
1032
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
1031
1033
|
to make generation deterministic.
|
@@ -1051,11 +1053,11 @@ class TextToVideoZeroSDXLPipeline(
|
|
1051
1053
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
1052
1054
|
tensor will ge generated by sampling using the supplied random `generator`.
|
1053
1055
|
motion_field_strength_x (`float`, *optional*, defaults to 12):
|
1054
|
-
Strength of motion in generated video along x-axis. See the
|
1055
|
-
Sect. 3.3.1.
|
1056
|
+
Strength of motion in generated video along x-axis. See the
|
1057
|
+
[paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
|
1056
1058
|
motion_field_strength_y (`float`, *optional*, defaults to 12):
|
1057
|
-
Strength of motion in generated video along y-axis. See the
|
1058
|
-
Sect. 3.3.1.
|
1059
|
+
Strength of motion in generated video along y-axis. See the
|
1060
|
+
[paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
|
1059
1061
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
1060
1062
|
The output format of the generate image. Choose between
|
1061
1063
|
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
@@ -1074,9 +1076,10 @@ class TextToVideoZeroSDXLPipeline(
|
|
1074
1076
|
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
1075
1077
|
guidance_rescale (`float`, *optional*, defaults to 0.7):
|
1076
1078
|
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
1077
|
-
Flawed](https://
|
1078
|
-
[Common Diffusion Noise Schedules and Sample Steps are
|
1079
|
-
Guidance rescale factor should fix overexposure when
|
1079
|
+
Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
|
1080
|
+
[Common Diffusion Noise Schedules and Sample Steps are
|
1081
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
1082
|
+
using zero terminal SNR.
|
1080
1083
|
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
1081
1084
|
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
1082
1085
|
`original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
|
@@ -1093,10 +1096,10 @@ class TextToVideoZeroSDXLPipeline(
|
|
1093
1096
|
section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
1094
1097
|
t0 (`int`, *optional*, defaults to 44):
|
1095
1098
|
Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the
|
1096
|
-
[paper](https://
|
1099
|
+
[paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
|
1097
1100
|
t1 (`int`, *optional*, defaults to 47):
|
1098
1101
|
Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the
|
1099
|
-
[paper](https://
|
1102
|
+
[paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
|
1100
1103
|
|
1101
1104
|
Returns:
|
1102
1105
|
[`~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput`] or
|
@@ -1153,7 +1156,7 @@ class TextToVideoZeroSDXLPipeline(
|
|
1153
1156
|
)
|
1154
1157
|
device = self._execution_device
|
1155
1158
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
1156
|
-
# of the Imagen paper: https://
|
1159
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
1157
1160
|
# corresponds to doing no classifier free guidance.
|
1158
1161
|
do_classifier_free_guidance = guidance_scale > 1.0
|
1159
1162
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Kakao Brain and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -24,7 +24,7 @@ from ...models import PriorTransformer, UNet2DConditionModel, UNet2DModel
|
|
24
24
|
from ...schedulers import UnCLIPScheduler
|
25
25
|
from ...utils import is_torch_xla_available, logging
|
26
26
|
from ...utils.torch_utils import randn_tensor
|
27
|
-
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
27
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
28
28
|
from .text_proj import UnCLIPTextProjModel
|
29
29
|
|
30
30
|
|
@@ -38,7 +38,7 @@ else:
|
|
38
38
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
39
39
|
|
40
40
|
|
41
|
-
class UnCLIPPipeline(DiffusionPipeline):
|
41
|
+
class UnCLIPPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
42
42
|
"""
|
43
43
|
Pipeline for text-to-image generation using unCLIP.
|
44
44
|
|
@@ -69,6 +69,7 @@ class UnCLIPPipeline(DiffusionPipeline):
|
|
69
69
|
|
70
70
|
"""
|
71
71
|
|
72
|
+
_last_supported_version = "0.33.1"
|
72
73
|
_exclude_from_cpu_offload = ["prior"]
|
73
74
|
|
74
75
|
prior: PriorTransformer
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Kakao Brain and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -29,7 +29,7 @@ from ...models import UNet2DConditionModel, UNet2DModel
|
|
29
29
|
from ...schedulers import UnCLIPScheduler
|
30
30
|
from ...utils import is_torch_xla_available, logging
|
31
31
|
from ...utils.torch_utils import randn_tensor
|
32
|
-
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
32
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
33
33
|
from .text_proj import UnCLIPTextProjModel
|
34
34
|
|
35
35
|
|
@@ -43,7 +43,7 @@ else:
|
|
43
43
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
44
44
|
|
45
45
|
|
46
|
-
class UnCLIPImageVariationPipeline(DiffusionPipeline):
|
46
|
+
class UnCLIPImageVariationPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
47
47
|
"""
|
48
48
|
Pipeline to generate image variations from an input image using UnCLIP.
|
49
49
|
|
@@ -73,6 +73,7 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline):
|
|
73
73
|
Scheduler used in the super resolution denoising process (a modified [`DDPMScheduler`]).
|
74
74
|
"""
|
75
75
|
|
76
|
+
_last_supported_version = "0.33.1"
|
76
77
|
decoder: UNet2DConditionModel
|
77
78
|
text_proj: UnCLIPTextProjModel
|
78
79
|
text_encoder: CLIPTextModelWithProjection
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Kakao Brain and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -24,7 +24,7 @@ class UnCLIPTextProjModel(ModelMixin, ConfigMixin):
|
|
24
24
|
Utility class for CLIP embeddings. Used to combine the image and text embeddings into a format usable by the
|
25
25
|
decoder.
|
26
26
|
|
27
|
-
For more details, see the original paper: https://
|
27
|
+
For more details, see the original paper: https://huggingface.co/papers/2204.06125 section 2.1
|
28
28
|
"""
|
29
29
|
|
30
30
|
@register_to_config
|
@@ -13,7 +13,7 @@ from ...models import ModelMixin
|
|
13
13
|
# Modified from ClipCaptionModel in https://github.com/thu-ml/unidiffuser/blob/main/libs/caption_decoder.py
|
14
14
|
class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin):
|
15
15
|
"""
|
16
|
-
Text decoder model for a image-text [UniDiffuser](https://
|
16
|
+
Text decoder model for a image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is used to
|
17
17
|
generate text from the UniDiffuser image-text embedding.
|
18
18
|
|
19
19
|
Parameters:
|
@@ -140,7 +140,7 @@ class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin):
|
|
140
140
|
input_ids (`torch.Tensor` of shape `(N, max_seq_len)`):
|
141
141
|
Text tokens to use for inference.
|
142
142
|
prefix_embeds (`torch.Tensor` of shape `(N, prefix_length, 768)`):
|
143
|
-
Prefix embedding to
|
143
|
+
Prefix embedding to prepend to the embedded tokens.
|
144
144
|
attention_mask (`torch.Tensor` of shape `(N, prefix_length + max_seq_len, 768)`, *optional*):
|
145
145
|
Attention mask for the prefix embedding.
|
146
146
|
labels (`torch.Tensor`, *optional*):
|
@@ -832,7 +832,7 @@ class UTransformer2DModel(ModelMixin, ConfigMixin):
|
|
832
832
|
|
833
833
|
class UniDiffuserModel(ModelMixin, ConfigMixin):
|
834
834
|
"""
|
835
|
-
Transformer model for a image-text [UniDiffuser](https://
|
835
|
+
Transformer model for a image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is a
|
836
836
|
modification of [`UTransformer2DModel`] with input and output heads for the VAE-embedded latent image, the
|
837
837
|
CLIP-embedded image, and the CLIP-embedded prompt (see paper for more details).
|
838
838
|
|
@@ -28,7 +28,7 @@ from ...utils import (
|
|
28
28
|
)
|
29
29
|
from ...utils.outputs import BaseOutput
|
30
30
|
from ...utils.torch_utils import randn_tensor
|
31
|
-
from ..pipeline_utils import DiffusionPipeline
|
31
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline
|
32
32
|
from .modeling_text_decoder import UniDiffuserTextDecoder
|
33
33
|
from .modeling_uvit import UniDiffuserModel
|
34
34
|
|
@@ -62,7 +62,7 @@ class ImageTextPipelineOutput(BaseOutput):
|
|
62
62
|
text: Optional[Union[List[str], List[List[str]]]]
|
63
63
|
|
64
64
|
|
65
|
-
class UniDiffuserPipeline(DiffusionPipeline):
|
65
|
+
class UniDiffuserPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
66
66
|
r"""
|
67
67
|
Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned
|
68
68
|
image generation, image-conditioned text generation, and joint image-text generation.
|
@@ -96,6 +96,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
|
|
96
96
|
original UniDiffuser paper uses the [`DPMSolverMultistepScheduler`] scheduler.
|
97
97
|
"""
|
98
98
|
|
99
|
+
_last_supported_version = "0.33.1"
|
99
100
|
# TODO: support for moving submodules for components with enable_model_cpu_offload
|
100
101
|
model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae->text_decoder"
|
101
102
|
|
@@ -153,7 +154,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
|
|
153
154
|
def prepare_extra_step_kwargs(self, generator, eta):
|
154
155
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
155
156
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
156
|
-
# eta corresponds to η in DDIM paper: https://
|
157
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
157
158
|
# and should be between [0, 1]
|
158
159
|
|
159
160
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -803,7 +804,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
|
|
803
804
|
|
804
805
|
def _combine(self, img_vae, img_clip):
|
805
806
|
r"""
|
806
|
-
Combines a latent
|
807
|
+
Combines a latent image img_vae of shape (B, C, H, W) and a CLIP-embedded image img_clip of shape (B, 1,
|
807
808
|
clip_img_dim) into a single tensor of shape (B, C * H * W + clip_img_dim).
|
808
809
|
"""
|
809
810
|
img_vae = torch.reshape(img_vae, (img_vae.shape[0], -1))
|
@@ -1154,8 +1155,8 @@ class UniDiffuserPipeline(DiffusionPipeline):
|
|
1154
1155
|
`text` mode. If the mode is joint and both `num_images_per_prompt` and `num_prompts_per_image` are
|
1155
1156
|
supplied, `min(num_images_per_prompt, num_prompts_per_image)` samples are generated.
|
1156
1157
|
eta (`float`, *optional*, defaults to 0.0):
|
1157
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
1158
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1158
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
1159
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1159
1160
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1160
1161
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1161
1162
|
generation deterministic.
|
@@ -1243,7 +1244,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
|
|
1243
1244
|
reduce_text_emb_dim = self.text_intermediate_dim < self.text_encoder_hidden_size or self.mode != "text2img"
|
1244
1245
|
|
1245
1246
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
1246
|
-
# of the Imagen paper: https://
|
1247
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
1247
1248
|
# corresponds to doing no classifier free guidance.
|
1248
1249
|
# Note that this differs from the formulation in the unidiffusers paper!
|
1249
1250
|
do_classifier_free_guidance = guidance_scale > 1.0
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_import_structure = {}
|
15
|
+
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_visualcloze_combined"] = ["VisualClozePipeline"]
|
26
|
+
_import_structure["pipeline_visualcloze_generation"] = ["VisualClozeGenerationPipeline"]
|
27
|
+
|
28
|
+
|
29
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
30
|
+
try:
|
31
|
+
if not (is_transformers_available() and is_torch_available()):
|
32
|
+
raise OptionalDependencyNotAvailable()
|
33
|
+
|
34
|
+
except OptionalDependencyNotAvailable:
|
35
|
+
from ...utils.dummy_torch_and_transformers_objects import *
|
36
|
+
else:
|
37
|
+
from .pipeline_visualcloze_combined import VisualClozePipeline
|
38
|
+
from .pipeline_visualcloze_generation import VisualClozeGenerationPipeline
|
39
|
+
|
40
|
+
|
41
|
+
else:
|
42
|
+
import sys
|
43
|
+
|
44
|
+
sys.modules[__name__] = _LazyModule(
|
45
|
+
__name__,
|
46
|
+
globals()["__file__"],
|
47
|
+
_import_structure,
|
48
|
+
module_spec=__spec__,
|
49
|
+
)
|
50
|
+
|
51
|
+
for name, value in _dummy_objects.items():
|
52
|
+
setattr(sys.modules[__name__], name, value)
|