diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +145 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +3 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +2 -2
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +3 -3
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +9 -8
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +332 -227
- diffusers/hooks/hooks.py +58 -3
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +5 -10
- diffusers/hooks/pyramid_attention_broadcast.py +15 -12
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +10 -0
- diffusers/loaders/ip_adapter.py +260 -18
- diffusers/loaders/lora_base.py +261 -127
- diffusers/loaders/lora_conversion_utils.py +657 -35
- diffusers/loaders/lora_pipeline.py +2778 -1246
- diffusers/loaders/peft.py +78 -112
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +64 -15
- diffusers/loaders/single_file_utils.py +395 -7
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +10 -11
- diffusers/loaders/transformer_sd3.py +8 -3
- diffusers/loaders/unet.py +24 -21
- diffusers/loaders/unet_loader_utils.py +6 -3
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +23 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +488 -7
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +113 -667
- diffusers/models/auto_model.py +49 -12
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +17 -4
- diffusers/models/autoencoders/autoencoder_kl.py +5 -5
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +32 -10
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +21 -20
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +5 -5
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +36 -46
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +203 -108
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +7 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +641 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +353 -27
- diffusers/models/transformers/transformer_cosmos.py +586 -0
- diffusers/models/transformers/transformer_flux.py +376 -138
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +105 -24
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +316 -87
- diffusers/models/transformers/transformer_wan_vace.py +387 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +4 -3
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +68 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +23 -20
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +4 -2
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +37 -36
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
- diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +5 -6
- diffusers/pipelines/pipeline_loading_utils.py +113 -15
- diffusers/pipelines/pipeline_utils.py +127 -48
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +91 -30
- diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
- diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +3 -1
- diffusers/quantizers/base.py +17 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +108 -16
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +16 -9
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -2
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
- diffusers/schedulers/scheduling_utils.py +3 -3
- diffusers/schedulers/scheduling_utils_flax.py +2 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +91 -5
- diffusers/utils/__init__.py +15 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/constants.py +4 -0
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +432 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
- diffusers/utils/dynamic_modules_utils.py +85 -8
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +151 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +96 -10
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +195 -17
- diffusers/utils/torch_utils.py +43 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
- diffusers-0.35.0.dist-info/RECORD +703 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,251 @@
|
|
1
|
+
# Copyright 2025 VisualCloze team and The HuggingFace Team. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import Dict, List, Optional, Tuple, Union
|
16
|
+
|
17
|
+
import torch
|
18
|
+
from PIL import Image
|
19
|
+
|
20
|
+
from ...image_processor import VaeImageProcessor
|
21
|
+
|
22
|
+
|
23
|
+
class VisualClozeProcessor(VaeImageProcessor):
|
24
|
+
"""
|
25
|
+
Image processor for the VisualCloze pipeline.
|
26
|
+
|
27
|
+
This processor handles the preprocessing of images for visual cloze tasks, including resizing, normalization, and
|
28
|
+
mask generation.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
resolution (int, optional):
|
32
|
+
Target resolution for processing images. Each image will be resized to this resolution before being
|
33
|
+
concatenated to avoid the out-of-memory error. Defaults to 384.
|
34
|
+
*args: Additional arguments passed to [~image_processor.VaeImageProcessor]
|
35
|
+
**kwargs: Additional keyword arguments passed to [~image_processor.VaeImageProcessor]
|
36
|
+
"""
|
37
|
+
|
38
|
+
def __init__(self, *args, resolution: int = 384, **kwargs):
|
39
|
+
super().__init__(*args, **kwargs)
|
40
|
+
self.resolution = resolution
|
41
|
+
|
42
|
+
def preprocess_image(
|
43
|
+
self, input_images: List[List[Optional[Image.Image]]], vae_scale_factor: int
|
44
|
+
) -> Tuple[List[List[torch.Tensor]], List[List[List[int]]], List[int]]:
|
45
|
+
"""
|
46
|
+
Preprocesses input images for the VisualCloze pipeline.
|
47
|
+
|
48
|
+
This function handles the preprocessing of input images by:
|
49
|
+
1. Resizing and cropping images to maintain consistent dimensions
|
50
|
+
2. Converting images to the Tensor format for the VAE
|
51
|
+
3. Normalizing pixel values
|
52
|
+
4. Tracking image sizes and positions of target images
|
53
|
+
|
54
|
+
Args:
|
55
|
+
input_images (List[List[Optional[Image.Image]]]):
|
56
|
+
A nested list of PIL Images where:
|
57
|
+
- Outer list represents different samples, including in-context examples and the query
|
58
|
+
- Inner list contains images for the task
|
59
|
+
- In the last row, condition images are provided and the target images are placed as None
|
60
|
+
vae_scale_factor (int):
|
61
|
+
The scale factor used by the VAE for resizing images
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
Tuple containing:
|
65
|
+
- List[List[torch.Tensor]]: Preprocessed images in tensor format
|
66
|
+
- List[List[List[int]]]: Dimensions of each processed image [height, width]
|
67
|
+
- List[int]: Target positions indicating which images are to be generated
|
68
|
+
"""
|
69
|
+
n_samples, n_task_images = len(input_images), len(input_images[0])
|
70
|
+
divisible = 2 * vae_scale_factor
|
71
|
+
|
72
|
+
processed_images: List[List[Image.Image]] = [[] for _ in range(n_samples)]
|
73
|
+
resize_size: List[Optional[Tuple[int, int]]] = [None for _ in range(n_samples)]
|
74
|
+
target_position: List[int] = []
|
75
|
+
|
76
|
+
# Process each sample
|
77
|
+
for i in range(n_samples):
|
78
|
+
# Determine size from first non-None image
|
79
|
+
for j in range(n_task_images):
|
80
|
+
if input_images[i][j] is not None:
|
81
|
+
aspect_ratio = input_images[i][j].width / input_images[i][j].height
|
82
|
+
target_area = self.resolution * self.resolution
|
83
|
+
new_h = int((target_area / aspect_ratio) ** 0.5)
|
84
|
+
new_w = int(new_h * aspect_ratio)
|
85
|
+
|
86
|
+
new_w = max(new_w // divisible, 1) * divisible
|
87
|
+
new_h = max(new_h // divisible, 1) * divisible
|
88
|
+
resize_size[i] = (new_w, new_h)
|
89
|
+
break
|
90
|
+
|
91
|
+
# Process all images in the sample
|
92
|
+
for j in range(n_task_images):
|
93
|
+
if input_images[i][j] is not None:
|
94
|
+
target = self._resize_and_crop(input_images[i][j], resize_size[i][0], resize_size[i][1])
|
95
|
+
processed_images[i].append(target)
|
96
|
+
if i == n_samples - 1:
|
97
|
+
target_position.append(0)
|
98
|
+
else:
|
99
|
+
blank = Image.new("RGB", resize_size[i] or (self.resolution, self.resolution), (0, 0, 0))
|
100
|
+
processed_images[i].append(blank)
|
101
|
+
if i == n_samples - 1:
|
102
|
+
target_position.append(1)
|
103
|
+
|
104
|
+
# Ensure consistent width for multiple target images when there are multiple target images
|
105
|
+
if len(target_position) > 1 and sum(target_position) > 1:
|
106
|
+
new_w = resize_size[n_samples - 1][0] or 384
|
107
|
+
for i in range(len(processed_images)):
|
108
|
+
for j in range(len(processed_images[i])):
|
109
|
+
if processed_images[i][j] is not None:
|
110
|
+
new_h = int(processed_images[i][j].height * (new_w / processed_images[i][j].width))
|
111
|
+
new_w = int(new_w / 16) * 16
|
112
|
+
new_h = int(new_h / 16) * 16
|
113
|
+
processed_images[i][j] = self.height(processed_images[i][j], new_h, new_w)
|
114
|
+
|
115
|
+
# Convert to tensors and normalize
|
116
|
+
image_sizes = []
|
117
|
+
for i in range(len(processed_images)):
|
118
|
+
image_sizes.append([[img.height, img.width] for img in processed_images[i]])
|
119
|
+
for j, image in enumerate(processed_images[i]):
|
120
|
+
image = self.pil_to_numpy(image)
|
121
|
+
image = self.numpy_to_pt(image)
|
122
|
+
image = self.normalize(image)
|
123
|
+
processed_images[i][j] = image
|
124
|
+
|
125
|
+
return processed_images, image_sizes, target_position
|
126
|
+
|
127
|
+
def preprocess_mask(
|
128
|
+
self, input_images: List[List[Image.Image]], target_position: List[int]
|
129
|
+
) -> List[List[torch.Tensor]]:
|
130
|
+
"""
|
131
|
+
Generate masks for the VisualCloze pipeline.
|
132
|
+
|
133
|
+
Args:
|
134
|
+
input_images (List[List[Image.Image]]):
|
135
|
+
Processed images from preprocess_image
|
136
|
+
target_position (List[int]):
|
137
|
+
Binary list marking the positions of target images (1 for target, 0 for condition)
|
138
|
+
|
139
|
+
Returns:
|
140
|
+
List[List[torch.Tensor]]:
|
141
|
+
A nested list of mask tensors (1 for target positions, 0 for condition images)
|
142
|
+
"""
|
143
|
+
mask = []
|
144
|
+
for i, row in enumerate(input_images):
|
145
|
+
if i == len(input_images) - 1: # Query row
|
146
|
+
row_masks = [
|
147
|
+
torch.full((1, 1, row[0].shape[2], row[0].shape[3]), fill_value=m) for m in target_position
|
148
|
+
]
|
149
|
+
else: # In-context examples
|
150
|
+
row_masks = [
|
151
|
+
torch.full((1, 1, row[0].shape[2], row[0].shape[3]), fill_value=0) for _ in target_position
|
152
|
+
]
|
153
|
+
mask.append(row_masks)
|
154
|
+
return mask
|
155
|
+
|
156
|
+
def preprocess_image_upsampling(
|
157
|
+
self,
|
158
|
+
input_images: List[List[Image.Image]],
|
159
|
+
height: int,
|
160
|
+
width: int,
|
161
|
+
) -> Tuple[List[List[Image.Image]], List[List[List[int]]]]:
|
162
|
+
"""Process images for the upsampling stage in the VisualCloze pipeline.
|
163
|
+
|
164
|
+
Args:
|
165
|
+
input_images: Input image to process
|
166
|
+
height: Target height
|
167
|
+
width: Target width
|
168
|
+
|
169
|
+
Returns:
|
170
|
+
Tuple of processed image and its size
|
171
|
+
"""
|
172
|
+
image = self.resize(input_images[0][0], height, width)
|
173
|
+
image = self.pil_to_numpy(image) # to np
|
174
|
+
image = self.numpy_to_pt(image) # to pt
|
175
|
+
image = self.normalize(image)
|
176
|
+
|
177
|
+
input_images[0][0] = image
|
178
|
+
image_sizes = [[[height, width]]]
|
179
|
+
return input_images, image_sizes
|
180
|
+
|
181
|
+
def preprocess_mask_upsampling(self, input_images: List[List[Image.Image]]) -> List[List[torch.Tensor]]:
|
182
|
+
return [[torch.ones((1, 1, input_images[0][0].shape[2], input_images[0][0].shape[3]))]]
|
183
|
+
|
184
|
+
def get_layout_prompt(self, size: Tuple[int, int]) -> str:
|
185
|
+
layout_instruction = (
|
186
|
+
f"A grid layout with {size[0]} rows and {size[1]} columns, displaying {size[0] * size[1]} images arranged side by side.",
|
187
|
+
)
|
188
|
+
return layout_instruction
|
189
|
+
|
190
|
+
def preprocess(
|
191
|
+
self,
|
192
|
+
task_prompt: Union[str, List[str]],
|
193
|
+
content_prompt: Union[str, List[str]],
|
194
|
+
input_images: Optional[List[List[List[Optional[str]]]]] = None,
|
195
|
+
height: Optional[int] = None,
|
196
|
+
width: Optional[int] = None,
|
197
|
+
upsampling: bool = False,
|
198
|
+
vae_scale_factor: int = 16,
|
199
|
+
) -> Dict:
|
200
|
+
"""Process visual cloze inputs.
|
201
|
+
|
202
|
+
Args:
|
203
|
+
task_prompt: Task description(s)
|
204
|
+
content_prompt: Content description(s)
|
205
|
+
input_images: List of images or None for the target images
|
206
|
+
height: Optional target height for upsampling stage
|
207
|
+
width: Optional target width for upsampling stage
|
208
|
+
upsampling: Whether this is in the upsampling processing stage
|
209
|
+
|
210
|
+
Returns:
|
211
|
+
Dictionary containing processed images, masks, prompts and metadata
|
212
|
+
"""
|
213
|
+
if isinstance(task_prompt, str):
|
214
|
+
task_prompt = [task_prompt]
|
215
|
+
content_prompt = [content_prompt]
|
216
|
+
input_images = [input_images]
|
217
|
+
|
218
|
+
output = {
|
219
|
+
"init_image": [],
|
220
|
+
"mask": [],
|
221
|
+
"task_prompt": task_prompt if not upsampling else [None for _ in range(len(task_prompt))],
|
222
|
+
"content_prompt": content_prompt,
|
223
|
+
"layout_prompt": [],
|
224
|
+
"target_position": [],
|
225
|
+
"image_size": [],
|
226
|
+
}
|
227
|
+
for i in range(len(task_prompt)):
|
228
|
+
if upsampling:
|
229
|
+
layout_prompt = None
|
230
|
+
else:
|
231
|
+
layout_prompt = self.get_layout_prompt((len(input_images[i]), len(input_images[i][0])))
|
232
|
+
|
233
|
+
if upsampling:
|
234
|
+
cur_processed_images, cur_image_size = self.preprocess_image_upsampling(
|
235
|
+
input_images[i], height=height, width=width
|
236
|
+
)
|
237
|
+
cur_mask = self.preprocess_mask_upsampling(cur_processed_images)
|
238
|
+
else:
|
239
|
+
cur_processed_images, cur_image_size, cur_target_position = self.preprocess_image(
|
240
|
+
input_images[i], vae_scale_factor=vae_scale_factor
|
241
|
+
)
|
242
|
+
cur_mask = self.preprocess_mask(cur_processed_images, cur_target_position)
|
243
|
+
|
244
|
+
output["target_position"].append(cur_target_position)
|
245
|
+
|
246
|
+
output["image_size"].append(cur_image_size)
|
247
|
+
output["init_image"].append(cur_processed_images)
|
248
|
+
output["mask"].append(cur_mask)
|
249
|
+
output["layout_prompt"].append(layout_prompt)
|
250
|
+
|
251
|
+
return output
|
@@ -24,6 +24,7 @@ except OptionalDependencyNotAvailable:
|
|
24
24
|
else:
|
25
25
|
_import_structure["pipeline_wan"] = ["WanPipeline"]
|
26
26
|
_import_structure["pipeline_wan_i2v"] = ["WanImageToVideoPipeline"]
|
27
|
+
_import_structure["pipeline_wan_vace"] = ["WanVACEPipeline"]
|
27
28
|
_import_structure["pipeline_wan_video2video"] = ["WanVideoToVideoPipeline"]
|
28
29
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
29
30
|
try:
|
@@ -35,6 +36,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
35
36
|
else:
|
36
37
|
from .pipeline_wan import WanPipeline
|
37
38
|
from .pipeline_wan_i2v import WanImageToVideoPipeline
|
39
|
+
from .pipeline_wan_vace import WanVACEPipeline
|
38
40
|
from .pipeline_wan_video2video import WanVideoToVideoPipeline
|
39
41
|
|
40
42
|
else:
|
@@ -112,18 +112,31 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
112
112
|
A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
|
113
113
|
vae ([`AutoencoderKLWan`]):
|
114
114
|
Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.
|
115
|
+
transformer_2 ([`WanTransformer3DModel`], *optional*):
|
116
|
+
Conditional Transformer to denoise the input latents during the low-noise stage. If provided, enables
|
117
|
+
two-stage denoising where `transformer` handles high-noise stages and `transformer_2` handles low-noise
|
118
|
+
stages. If not provided, only `transformer` is used.
|
119
|
+
boundary_ratio (`float`, *optional*, defaults to `None`):
|
120
|
+
Ratio of total timesteps to use as the boundary for switching between transformers in two-stage denoising.
|
121
|
+
The actual boundary timestep is calculated as `boundary_ratio * num_train_timesteps`. When provided,
|
122
|
+
`transformer` handles timesteps >= boundary_timestep and `transformer_2` handles timesteps <
|
123
|
+
boundary_timestep. If `None`, only `transformer` is used for the entire denoising process.
|
115
124
|
"""
|
116
125
|
|
117
|
-
model_cpu_offload_seq = "text_encoder->transformer->vae"
|
126
|
+
model_cpu_offload_seq = "text_encoder->transformer->transformer_2->vae"
|
118
127
|
_callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
|
128
|
+
_optional_components = ["transformer", "transformer_2"]
|
119
129
|
|
120
130
|
def __init__(
|
121
131
|
self,
|
122
132
|
tokenizer: AutoTokenizer,
|
123
133
|
text_encoder: UMT5EncoderModel,
|
124
|
-
transformer: WanTransformer3DModel,
|
125
134
|
vae: AutoencoderKLWan,
|
126
135
|
scheduler: FlowMatchEulerDiscreteScheduler,
|
136
|
+
transformer: Optional[WanTransformer3DModel] = None,
|
137
|
+
transformer_2: Optional[WanTransformer3DModel] = None,
|
138
|
+
boundary_ratio: Optional[float] = None,
|
139
|
+
expand_timesteps: bool = False, # Wan2.2 ti2v
|
127
140
|
):
|
128
141
|
super().__init__()
|
129
142
|
|
@@ -133,10 +146,12 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
133
146
|
tokenizer=tokenizer,
|
134
147
|
transformer=transformer,
|
135
148
|
scheduler=scheduler,
|
149
|
+
transformer_2=transformer_2,
|
136
150
|
)
|
137
|
-
|
138
|
-
self.
|
139
|
-
self.
|
151
|
+
self.register_to_config(boundary_ratio=boundary_ratio)
|
152
|
+
self.register_to_config(expand_timesteps=expand_timesteps)
|
153
|
+
self.vae_scale_factor_temporal = self.vae.config.scale_factor_temporal if getattr(self, "vae", None) else 4
|
154
|
+
self.vae_scale_factor_spatial = self.vae.config.scale_factor_spatial if getattr(self, "vae", None) else 8
|
140
155
|
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
|
141
156
|
|
142
157
|
def _get_t5_prompt_embeds(
|
@@ -270,6 +285,7 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
270
285
|
prompt_embeds=None,
|
271
286
|
negative_prompt_embeds=None,
|
272
287
|
callback_on_step_end_tensor_inputs=None,
|
288
|
+
guidance_scale_2=None,
|
273
289
|
):
|
274
290
|
if height % 16 != 0 or width % 16 != 0:
|
275
291
|
raise ValueError(f"`height` and `width` have to be divisible by 16 but are {height} and {width}.")
|
@@ -302,6 +318,9 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
302
318
|
):
|
303
319
|
raise ValueError(f"`negative_prompt` has to be of type `str` or `list` but is {type(negative_prompt)}")
|
304
320
|
|
321
|
+
if self.config.boundary_ratio is None and guidance_scale_2 is not None:
|
322
|
+
raise ValueError("`guidance_scale_2` is only supported when the pipeline's `boundary_ratio` is not None.")
|
323
|
+
|
305
324
|
def prepare_latents(
|
306
325
|
self,
|
307
326
|
batch_size: int,
|
@@ -369,6 +388,7 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
369
388
|
num_frames: int = 81,
|
370
389
|
num_inference_steps: int = 50,
|
371
390
|
guidance_scale: float = 5.0,
|
391
|
+
guidance_scale_2: Optional[float] = None,
|
372
392
|
num_videos_per_prompt: Optional[int] = 1,
|
373
393
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
374
394
|
latents: Optional[torch.Tensor] = None,
|
@@ -388,8 +408,10 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
388
408
|
|
389
409
|
Args:
|
390
410
|
prompt (`str` or `List[str]`, *optional*):
|
391
|
-
The prompt or prompts to guide the image generation. If not defined,
|
392
|
-
|
411
|
+
The prompt or prompts to guide the image generation. If not defined, pass `prompt_embeds` instead.
|
412
|
+
negative_prompt (`str` or `List[str]`, *optional*):
|
413
|
+
The prompt or prompts to avoid during image generation. If not defined, pass `negative_prompt_embeds`
|
414
|
+
instead. Ignored when not using guidance (`guidance_scale` < `1`).
|
393
415
|
height (`int`, defaults to `480`):
|
394
416
|
The height in pixels of the generated image.
|
395
417
|
width (`int`, defaults to `832`):
|
@@ -400,11 +422,15 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
400
422
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
401
423
|
expense of slower inference.
|
402
424
|
guidance_scale (`float`, defaults to `5.0`):
|
403
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
404
|
-
`guidance_scale` is defined as `w` of equation 2.
|
405
|
-
Paper](https://
|
406
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
407
|
-
usually at the expense of lower image quality.
|
425
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
426
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
427
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
428
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
429
|
+
the text `prompt`, usually at the expense of lower image quality.
|
430
|
+
guidance_scale_2 (`float`, *optional*, defaults to `None`):
|
431
|
+
Guidance scale for the low-noise stage transformer (`transformer_2`). If `None` and the pipeline's
|
432
|
+
`boundary_ratio` is not None, uses the same value as `guidance_scale`. Only used when `transformer_2`
|
433
|
+
and the pipeline's `boundary_ratio` are not None.
|
408
434
|
num_videos_per_prompt (`int`, *optional*, defaults to 1):
|
409
435
|
The number of images to generate per prompt.
|
410
436
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -417,7 +443,7 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
417
443
|
prompt_embeds (`torch.Tensor`, *optional*):
|
418
444
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
419
445
|
provided, text embeddings are generated from the `prompt` input argument.
|
420
|
-
output_type (`str`, *optional*, defaults to `"
|
446
|
+
output_type (`str`, *optional*, defaults to `"np"`):
|
421
447
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
422
448
|
return_dict (`bool`, *optional*, defaults to `True`):
|
423
449
|
Whether or not to return a [`WanPipelineOutput`] instead of a plain tuple.
|
@@ -434,8 +460,9 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
434
460
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
435
461
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
436
462
|
`._callback_tensor_inputs` attribute of your pipeline class.
|
437
|
-
|
438
|
-
The
|
463
|
+
max_sequence_length (`int`, defaults to `512`):
|
464
|
+
The maximum sequence length of the text encoder. If the prompt is longer than this, it will be
|
465
|
+
truncated. If the prompt is shorter, it will be padded to this length.
|
439
466
|
|
440
467
|
Examples:
|
441
468
|
|
@@ -458,6 +485,7 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
458
485
|
prompt_embeds,
|
459
486
|
negative_prompt_embeds,
|
460
487
|
callback_on_step_end_tensor_inputs,
|
488
|
+
guidance_scale_2,
|
461
489
|
)
|
462
490
|
|
463
491
|
if num_frames % self.vae_scale_factor_temporal != 1:
|
@@ -467,7 +495,11 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
467
495
|
num_frames = num_frames // self.vae_scale_factor_temporal * self.vae_scale_factor_temporal + 1
|
468
496
|
num_frames = max(num_frames, 1)
|
469
497
|
|
498
|
+
if self.config.boundary_ratio is not None and guidance_scale_2 is None:
|
499
|
+
guidance_scale_2 = guidance_scale
|
500
|
+
|
470
501
|
self._guidance_scale = guidance_scale
|
502
|
+
self._guidance_scale_2 = guidance_scale_2
|
471
503
|
self._attention_kwargs = attention_kwargs
|
472
504
|
self._current_timestep = None
|
473
505
|
self._interrupt = False
|
@@ -494,7 +526,7 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
494
526
|
device=device,
|
495
527
|
)
|
496
528
|
|
497
|
-
transformer_dtype = self.transformer.dtype
|
529
|
+
transformer_dtype = self.transformer.dtype if self.transformer is not None else self.transformer_2.dtype
|
498
530
|
prompt_embeds = prompt_embeds.to(transformer_dtype)
|
499
531
|
if negative_prompt_embeds is not None:
|
500
532
|
negative_prompt_embeds = negative_prompt_embeds.to(transformer_dtype)
|
@@ -504,7 +536,11 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
504
536
|
timesteps = self.scheduler.timesteps
|
505
537
|
|
506
538
|
# 5. Prepare latent variables
|
507
|
-
num_channels_latents =
|
539
|
+
num_channels_latents = (
|
540
|
+
self.transformer.config.in_channels
|
541
|
+
if self.transformer is not None
|
542
|
+
else self.transformer_2.config.in_channels
|
543
|
+
)
|
508
544
|
latents = self.prepare_latents(
|
509
545
|
batch_size * num_videos_per_prompt,
|
510
546
|
num_channels_latents,
|
@@ -517,36 +553,61 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
|
517
553
|
latents,
|
518
554
|
)
|
519
555
|
|
556
|
+
mask = torch.ones(latents.shape, dtype=torch.float32, device=device)
|
557
|
+
|
520
558
|
# 6. Denoising loop
|
521
559
|
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
522
560
|
self._num_timesteps = len(timesteps)
|
523
561
|
|
562
|
+
if self.config.boundary_ratio is not None:
|
563
|
+
boundary_timestep = self.config.boundary_ratio * self.scheduler.config.num_train_timesteps
|
564
|
+
else:
|
565
|
+
boundary_timestep = None
|
566
|
+
|
524
567
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
525
568
|
for i, t in enumerate(timesteps):
|
526
569
|
if self.interrupt:
|
527
570
|
continue
|
528
571
|
|
529
572
|
self._current_timestep = t
|
530
|
-
latent_model_input = latents.to(transformer_dtype)
|
531
|
-
timestep = t.expand(latents.shape[0])
|
532
573
|
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
574
|
+
if boundary_timestep is None or t >= boundary_timestep:
|
575
|
+
# wan2.1 or high-noise stage in wan2.2
|
576
|
+
current_model = self.transformer
|
577
|
+
current_guidance_scale = guidance_scale
|
578
|
+
else:
|
579
|
+
# low-noise stage in wan2.2
|
580
|
+
current_model = self.transformer_2
|
581
|
+
current_guidance_scale = guidance_scale_2
|
540
582
|
|
541
|
-
|
542
|
-
|
583
|
+
latent_model_input = latents.to(transformer_dtype)
|
584
|
+
if self.config.expand_timesteps:
|
585
|
+
# seq_len: num_latent_frames * latent_height//2 * latent_width//2
|
586
|
+
temp_ts = (mask[0][0][:, ::2, ::2] * t).flatten()
|
587
|
+
# batch_size, seq_len
|
588
|
+
timestep = temp_ts.unsqueeze(0).expand(latents.shape[0], -1)
|
589
|
+
else:
|
590
|
+
timestep = t.expand(latents.shape[0])
|
591
|
+
|
592
|
+
with current_model.cache_context("cond"):
|
593
|
+
noise_pred = current_model(
|
543
594
|
hidden_states=latent_model_input,
|
544
595
|
timestep=timestep,
|
545
|
-
encoder_hidden_states=
|
596
|
+
encoder_hidden_states=prompt_embeds,
|
546
597
|
attention_kwargs=attention_kwargs,
|
547
598
|
return_dict=False,
|
548
599
|
)[0]
|
549
|
-
|
600
|
+
|
601
|
+
if self.do_classifier_free_guidance:
|
602
|
+
with current_model.cache_context("uncond"):
|
603
|
+
noise_uncond = current_model(
|
604
|
+
hidden_states=latent_model_input,
|
605
|
+
timestep=timestep,
|
606
|
+
encoder_hidden_states=negative_prompt_embeds,
|
607
|
+
attention_kwargs=attention_kwargs,
|
608
|
+
return_dict=False,
|
609
|
+
)[0]
|
610
|
+
noise_pred = noise_uncond + current_guidance_scale * (noise_pred - noise_uncond)
|
550
611
|
|
551
612
|
# compute the previous noisy sample x_t -> x_t-1
|
552
613
|
latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
|