diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +17 -12
- diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
- diffusers-0.34.0.dist-info/RECORD +639 -0
- diffusers-0.33.0.dist-info/RECORD +0 -608
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import List, Union
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
import PIL.Image
|
6
|
+
|
7
|
+
from ...utils import BaseOutput
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class ChromaPipelineOutput(BaseOutput):
|
12
|
+
"""
|
13
|
+
Output class for Stable Diffusion pipelines.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
images (`List[PIL.Image.Image]` or `np.ndarray`)
|
17
|
+
List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
|
18
|
+
num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
|
19
|
+
"""
|
20
|
+
|
21
|
+
images: Union[List[PIL.Image.Image], np.ndarray]
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -359,7 +359,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
359
359
|
def prepare_extra_step_kwargs(self, generator, eta):
|
360
360
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
361
361
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
362
|
-
# eta corresponds to η in DDIM paper: https://
|
362
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
363
363
|
# and should be between [0, 1]
|
364
364
|
|
365
365
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -558,11 +558,11 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
558
558
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
559
559
|
passed will be used. Must be in descending order.
|
560
560
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
561
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
562
|
-
`guidance_scale` is defined as `w` of equation 2.
|
563
|
-
Paper](https://
|
564
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
565
|
-
usually at the expense of lower image quality.
|
561
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
562
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
563
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
564
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
565
|
+
the text `prompt`, usually at the expense of lower image quality.
|
566
566
|
num_videos_per_prompt (`int`, *optional*, defaults to 1):
|
567
567
|
The number of videos to generate per prompt.
|
568
568
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -645,7 +645,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
645
645
|
device = self._execution_device
|
646
646
|
|
647
647
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
648
|
-
# of the Imagen paper: https://
|
648
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
649
649
|
# corresponds to doing no classifier free guidance.
|
650
650
|
do_classifier_free_guidance = guidance_scale > 1.0
|
651
651
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI, Alibaba-PAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -398,7 +398,7 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
398
398
|
def prepare_extra_step_kwargs(self, generator, eta):
|
399
399
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
400
400
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
401
|
-
# eta corresponds to η in DDIM paper: https://
|
401
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
402
402
|
# and should be between [0, 1]
|
403
403
|
|
404
404
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -603,11 +603,11 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
603
603
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
604
604
|
passed will be used. Must be in descending order.
|
605
605
|
guidance_scale (`float`, *optional*, defaults to 6.0):
|
606
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
607
|
-
`guidance_scale` is defined as `w` of equation 2.
|
608
|
-
Paper](https://
|
609
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
610
|
-
usually at the expense of lower image quality.
|
606
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
607
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
608
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
609
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
610
|
+
the text `prompt`, usually at the expense of lower image quality.
|
611
611
|
num_videos_per_prompt (`int`, *optional*, defaults to 1):
|
612
612
|
The number of videos to generate per prompt.
|
613
613
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -698,7 +698,7 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
698
698
|
device = self._execution_device
|
699
699
|
|
700
700
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
701
|
-
# of the Imagen paper: https://
|
701
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
702
702
|
# corresponds to doing no classifier free guidance.
|
703
703
|
do_classifier_free_guidance = guidance_scale > 1.0
|
704
704
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -442,7 +442,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
442
442
|
def prepare_extra_step_kwargs(self, generator, eta):
|
443
443
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
444
444
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
445
|
-
# eta corresponds to η in DDIM paper: https://
|
445
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
446
446
|
# and should be between [0, 1]
|
447
447
|
|
448
448
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -658,11 +658,11 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
658
658
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
659
659
|
passed will be used. Must be in descending order.
|
660
660
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
661
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
662
|
-
`guidance_scale` is defined as `w` of equation 2.
|
663
|
-
Paper](https://
|
664
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
665
|
-
usually at the expense of lower image quality.
|
661
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
662
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
663
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
664
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
665
|
+
the text `prompt`, usually at the expense of lower image quality.
|
666
666
|
num_videos_per_prompt (`int`, *optional*, defaults to 1):
|
667
667
|
The number of videos to generate per prompt.
|
668
668
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -747,7 +747,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
747
747
|
device = self._execution_device
|
748
748
|
|
749
749
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
750
|
-
# of the Imagen paper: https://
|
750
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
751
751
|
# corresponds to doing no classifier free guidance.
|
752
752
|
do_classifier_free_guidance = guidance_scale > 1.0
|
753
753
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -418,7 +418,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
418
418
|
def prepare_extra_step_kwargs(self, generator, eta):
|
419
419
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
420
420
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
421
|
-
# eta corresponds to η in DDIM paper: https://
|
421
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
422
422
|
# and should be between [0, 1]
|
423
423
|
|
424
424
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -628,11 +628,11 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
628
628
|
strength (`float`, *optional*, defaults to 0.8):
|
629
629
|
Higher strength leads to more differences between original video and generated video.
|
630
630
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
631
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
632
|
-
`guidance_scale` is defined as `w` of equation 2.
|
633
|
-
Paper](https://
|
634
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
635
|
-
usually at the expense of lower image quality.
|
631
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
632
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
633
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
634
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
635
|
+
the text `prompt`, usually at the expense of lower image quality.
|
636
636
|
num_videos_per_prompt (`int`, *optional*, defaults to 1):
|
637
637
|
The number of videos to generate per prompt.
|
638
638
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -718,7 +718,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
718
718
|
device = self._execution_device
|
719
719
|
|
720
720
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
721
|
-
# of the Imagen paper: https://
|
721
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
722
722
|
# corresponds to doing no classifier free guidance.
|
723
723
|
do_classifier_free_guidance = guidance_scale > 1.0
|
724
724
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -319,7 +319,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
|
|
319
319
|
def prepare_extra_step_kwargs(self, generator, eta):
|
320
320
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
321
321
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
322
|
-
# eta corresponds to η in DDIM paper: https://
|
322
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
323
323
|
# and should be between [0, 1]
|
324
324
|
|
325
325
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -390,7 +390,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
|
|
390
390
|
return self._guidance_scale
|
391
391
|
|
392
392
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
393
|
-
# of the Imagen paper: https://
|
393
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
394
394
|
# corresponds to doing no classifier free guidance.
|
395
395
|
@property
|
396
396
|
def do_classifier_free_guidance(self):
|
@@ -453,11 +453,11 @@ class CogView3PlusPipeline(DiffusionPipeline):
|
|
453
453
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
454
454
|
passed will be used. Must be in descending order.
|
455
455
|
guidance_scale (`float`, *optional*, defaults to `5.0`):
|
456
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
457
|
-
`guidance_scale` is defined as `w` of equation 2.
|
458
|
-
Paper](https://
|
459
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
460
|
-
usually at the expense of lower image quality.
|
456
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
457
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
458
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
459
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
460
|
+
the text `prompt`, usually at the expense of lower image quality.
|
461
461
|
num_images_per_prompt (`int`, *optional*, defaults to `1`):
|
462
462
|
The number of images to generate per prompt.
|
463
463
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -547,7 +547,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
|
|
547
547
|
device = self._execution_device
|
548
548
|
|
549
549
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
550
|
-
# of the Imagen paper: https://
|
550
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
551
551
|
# corresponds to doing no classifier free guidance.
|
552
552
|
do_classifier_free_guidance = guidance_scale > 1.0
|
553
553
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -377,7 +377,7 @@ class CogView4Pipeline(DiffusionPipeline, CogView4LoraLoaderMixin):
|
|
377
377
|
return self._guidance_scale
|
378
378
|
|
379
379
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
380
|
-
# of the Imagen paper: https://
|
380
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
381
381
|
# corresponds to doing no classifier free guidance.
|
382
382
|
@property
|
383
383
|
def do_classifier_free_guidance(self):
|
@@ -453,11 +453,11 @@ class CogView4Pipeline(DiffusionPipeline, CogView4LoraLoaderMixin):
|
|
453
453
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
454
454
|
will be used.
|
455
455
|
guidance_scale (`float`, *optional*, defaults to `5.0`):
|
456
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
457
|
-
`guidance_scale` is defined as `w` of equation 2.
|
458
|
-
Paper](https://
|
459
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
460
|
-
usually at the expense of lower image quality.
|
456
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
457
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
458
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
459
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
460
|
+
the text `prompt`, usually at the expense of lower image quality.
|
461
461
|
num_images_per_prompt (`int`, *optional*, defaults to `1`):
|
462
462
|
The number of images to generate per prompt.
|
463
463
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -409,7 +409,7 @@ class CogView4ControlPipeline(DiffusionPipeline):
|
|
409
409
|
return self._guidance_scale
|
410
410
|
|
411
411
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
412
|
-
# of the Imagen paper: https://
|
412
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
413
413
|
# corresponds to doing no classifier free guidance.
|
414
414
|
@property
|
415
415
|
def do_classifier_free_guidance(self):
|
@@ -486,11 +486,11 @@ class CogView4ControlPipeline(DiffusionPipeline):
|
|
486
486
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
487
487
|
will be used.
|
488
488
|
guidance_scale (`float`, *optional*, defaults to `5.0`):
|
489
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
490
|
-
`guidance_scale` is defined as `w` of equation 2.
|
491
|
-
Paper](https://
|
492
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
493
|
-
usually at the expense of lower image quality.
|
489
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
490
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
491
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
492
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
493
|
+
the text `prompt`, usually at the expense of lower image quality.
|
494
494
|
num_images_per_prompt (`int`, *optional*, defaults to `1`):
|
495
495
|
The number of images to generate per prompt.
|
496
496
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -166,7 +166,7 @@ def process_face_embeddings(
|
|
166
166
|
raise RuntimeError("facexlib align face fail")
|
167
167
|
align_face = face_helper_1.cropped_faces[0] # (512, 512, 3) # RGB
|
168
168
|
|
169
|
-
#
|
169
|
+
# in case insightface didn't detect face
|
170
170
|
if id_ante_embedding is None:
|
171
171
|
logger.warning("Failed to detect face using insightface. Extracting embedding with align face")
|
172
172
|
id_ante_embedding = face_helper_2.get_feat(align_face)
|
@@ -294,7 +294,7 @@ def prepare_face_models(model_path, device, dtype):
|
|
294
294
|
|
295
295
|
Parameters:
|
296
296
|
- model_path: Path to the directory containing model files.
|
297
|
-
- device: The device (e.g., 'cuda', 'cpu') where models will be loaded.
|
297
|
+
- device: The device (e.g., 'cuda', 'xpu', 'cpu') where models will be loaded.
|
298
298
|
- dtype: Data type (e.g., torch.float32) for model inference.
|
299
299
|
|
300
300
|
Returns:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 ConsisID Authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -540,7 +540,7 @@ class ConsisIDPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
540
540
|
def prepare_extra_step_kwargs(self, generator, eta):
|
541
541
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
542
542
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
543
|
-
# eta corresponds to η in DDIM paper: https://
|
543
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
544
544
|
# and should be between [0, 1]
|
545
545
|
|
546
546
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -715,11 +715,11 @@ class ConsisIDPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
715
715
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
716
716
|
expense of slower inference.
|
717
717
|
guidance_scale (`float`, *optional*, defaults to 6):
|
718
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
719
|
-
`guidance_scale` is defined as `w` of equation 2.
|
720
|
-
Paper](https://
|
721
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
722
|
-
usually at the expense of lower image quality.
|
718
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
719
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
720
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
721
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
722
|
+
the text `prompt`, usually at the expense of lower image quality.
|
723
723
|
use_dynamic_cfg (`bool`, *optional*, defaults to `False`):
|
724
724
|
If True, dynamically adjusts the guidance scale during inference. This allows the model to use a
|
725
725
|
progressive guidance scale, improving the balance between text-guided generation and image quality over
|
@@ -821,7 +821,7 @@ class ConsisIDPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
821
821
|
device = self._execution_device
|
822
822
|
|
823
823
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
824
|
-
# of the Imagen paper: https://
|
824
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
825
825
|
# corresponds to doing no classifier free guidance.
|
826
826
|
do_classifier_free_guidance = guidance_scale > 1.0
|
827
827
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -37,7 +37,7 @@ from ...utils import (
|
|
37
37
|
scale_lora_layers,
|
38
38
|
unscale_lora_layers,
|
39
39
|
)
|
40
|
-
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
|
40
|
+
from ...utils.torch_utils import empty_device_cache, is_compiled_module, is_torch_version, randn_tensor
|
41
41
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
42
42
|
from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
43
43
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
@@ -579,7 +579,7 @@ class StableDiffusionControlNetPipeline(
|
|
579
579
|
def prepare_extra_step_kwargs(self, generator, eta):
|
580
580
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
581
581
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
582
|
-
# eta corresponds to η in DDIM paper: https://
|
582
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
583
583
|
# and should be between [0, 1]
|
584
584
|
|
585
585
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -886,7 +886,7 @@ class StableDiffusionControlNetPipeline(
|
|
886
886
|
return self._clip_skip
|
887
887
|
|
888
888
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
889
|
-
# of the Imagen paper: https://
|
889
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
890
890
|
# corresponds to doing no classifier free guidance.
|
891
891
|
@property
|
892
892
|
def do_classifier_free_guidance(self):
|
@@ -979,8 +979,8 @@ class StableDiffusionControlNetPipeline(
|
|
979
979
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
980
980
|
The number of images to generate per prompt.
|
981
981
|
eta (`float`, *optional*, defaults to 0.0):
|
982
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
983
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
982
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
983
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
984
984
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
985
985
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
986
986
|
generation deterministic.
|
@@ -1339,7 +1339,7 @@ class StableDiffusionControlNetPipeline(
|
|
1339
1339
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
1340
1340
|
self.unet.to("cpu")
|
1341
1341
|
self.controlnet.to("cpu")
|
1342
|
-
|
1342
|
+
empty_device_cache()
|
1343
1343
|
|
1344
1344
|
if not output_type == "latent":
|
1345
1345
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
|
@@ -1,5 +1,5 @@
|
|
1
|
-
# Copyright
|
2
|
-
# Copyright
|
1
|
+
# Copyright 2025 Salesforce.com, inc.
|
2
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
5
|
# you may not use this file except in compliance with the License.
|
@@ -149,7 +149,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
|
149
149
|
def get_query_embeddings(self, input_image, src_subject):
|
150
150
|
return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False)
|
151
151
|
|
152
|
-
# from the original Blip Diffusion code,
|
152
|
+
# from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it
|
153
153
|
def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20):
|
154
154
|
rv = []
|
155
155
|
for prompt, tgt_subject in zip(prompts, tgt_subjects):
|
@@ -280,11 +280,11 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
|
280
280
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
281
281
|
tensor will ge generated by random sampling.
|
282
282
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
283
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
284
|
-
`guidance_scale` is defined as `w` of equation 2.
|
285
|
-
Paper](https://
|
286
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
287
|
-
usually at the expense of lower image quality.
|
283
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
284
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
285
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
286
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
287
|
+
the text `prompt`, usually at the expense of lower image quality.
|
288
288
|
height (`int`, *optional*, defaults to 512):
|
289
289
|
The height of the generated image.
|
290
290
|
width (`int`, *optional*, defaults to 512):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -36,7 +36,7 @@ from ...utils import (
|
|
36
36
|
scale_lora_layers,
|
37
37
|
unscale_lora_layers,
|
38
38
|
)
|
39
|
-
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
39
|
+
from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
|
40
40
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
41
41
|
from ..stable_diffusion import StableDiffusionPipelineOutput
|
42
42
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
@@ -557,7 +557,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
557
557
|
def prepare_extra_step_kwargs(self, generator, eta):
|
558
558
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
559
559
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
560
|
-
# eta corresponds to η in DDIM paper: https://
|
560
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
561
561
|
# and should be between [0, 1]
|
562
562
|
|
563
563
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -884,7 +884,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
884
884
|
return self._clip_skip
|
885
885
|
|
886
886
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
887
|
-
# of the Imagen paper: https://
|
887
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
888
888
|
# corresponds to doing no classifier free guidance.
|
889
889
|
@property
|
890
890
|
def do_classifier_free_guidance(self):
|
@@ -977,8 +977,8 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
977
977
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
978
978
|
The number of images to generate per prompt.
|
979
979
|
eta (`float`, *optional*, defaults to 0.0):
|
980
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
981
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
980
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
981
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
982
982
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
983
983
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
984
984
|
generation deterministic.
|
@@ -1311,7 +1311,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
1311
1311
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
1312
1312
|
self.unet.to("cpu")
|
1313
1313
|
self.controlnet.to("cpu")
|
1314
|
-
|
1314
|
+
empty_device_cache()
|
1315
1315
|
|
1316
1316
|
if not output_type == "latent":
|
1317
1317
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -38,7 +38,7 @@ from ...utils import (
|
|
38
38
|
scale_lora_layers,
|
39
39
|
unscale_lora_layers,
|
40
40
|
)
|
41
|
-
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
41
|
+
from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
|
42
42
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
43
43
|
from ..stable_diffusion import StableDiffusionPipelineOutput
|
44
44
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
@@ -566,7 +566,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
566
566
|
def prepare_extra_step_kwargs(self, generator, eta):
|
567
567
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
568
568
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
569
|
-
# eta corresponds to η in DDIM paper: https://
|
569
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
570
570
|
# and should be between [0, 1]
|
571
571
|
|
572
572
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -976,7 +976,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
976
976
|
return self._clip_skip
|
977
977
|
|
978
978
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
979
|
-
# of the Imagen paper: https://
|
979
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
980
980
|
# corresponds to doing no classifier free guidance.
|
981
981
|
@property
|
982
982
|
def do_classifier_free_guidance(self):
|
@@ -1089,8 +1089,8 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1089
1089
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1090
1090
|
The number of images to generate per prompt.
|
1091
1091
|
eta (`float`, *optional*, defaults to 0.0):
|
1092
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
1093
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1092
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
1093
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1094
1094
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1095
1095
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1096
1096
|
generation deterministic.
|
@@ -1500,7 +1500,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1500
1500
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
1501
1501
|
self.unet.to("cpu")
|
1502
1502
|
self.controlnet.to("cpu")
|
1503
|
-
|
1503
|
+
empty_device_cache()
|
1504
1504
|
|
1505
1505
|
if not output_type == "latent":
|
1506
1506
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
|