diffusers 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +13 -10
- diffusers/pipelines/wan/pipeline_wan_i2v.py +38 -18
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/METADATA +70 -55
- diffusers-0.34.0.dist-info/RECORD +639 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -179,7 +179,7 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
|
|
179
179
|
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
180
180
|
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
181
181
|
|
182
|
-
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] =
|
182
|
+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
|
183
183
|
r"""
|
184
184
|
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
185
185
|
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
@@ -242,20 +242,20 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
|
|
242
242
|
width (`int`, *optional*, defaults to 512):
|
243
243
|
The width in pixels of the generated image.
|
244
244
|
prior_guidance_scale (`float`, *optional*, defaults to 4.0):
|
245
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
246
|
-
`guidance_scale` is defined as `w` of equation 2.
|
247
|
-
Paper](https://
|
248
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
249
|
-
usually at the expense of lower image quality.
|
245
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
246
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
247
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
248
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
249
|
+
the text `prompt`, usually at the expense of lower image quality.
|
250
250
|
prior_num_inference_steps (`int`, *optional*, defaults to 100):
|
251
251
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
252
252
|
expense of slower inference.
|
253
253
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
254
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
255
|
-
`guidance_scale` is defined as `w` of equation 2.
|
256
|
-
Paper](https://
|
257
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
258
|
-
usually at the expense of lower image quality.
|
254
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
255
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
256
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
257
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
258
|
+
the text `prompt`, usually at the expense of lower image quality.
|
259
259
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
260
260
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
261
261
|
to make generation deterministic.
|
@@ -407,7 +407,7 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
|
407
407
|
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
408
408
|
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
409
409
|
|
410
|
-
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] =
|
410
|
+
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
|
411
411
|
r"""
|
412
412
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
413
413
|
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
|
@@ -417,7 +417,7 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
|
417
417
|
self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
|
418
418
|
self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
|
419
419
|
|
420
|
-
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] =
|
420
|
+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
|
421
421
|
r"""
|
422
422
|
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
423
423
|
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
@@ -479,11 +479,11 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
|
479
479
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
480
480
|
The number of images to generate per prompt.
|
481
481
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
482
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
483
|
-
`guidance_scale` is defined as `w` of equation 2.
|
484
|
-
Paper](https://
|
485
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
486
|
-
usually at the expense of lower image quality.
|
482
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
483
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
484
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
485
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
486
|
+
the text `prompt`, usually at the expense of lower image quality.
|
487
487
|
strength (`float`, *optional*, defaults to 0.3):
|
488
488
|
Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
|
489
489
|
will be used as a starting point, adding more noise to it the larger the `strength`. The number of
|
@@ -498,11 +498,11 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
|
498
498
|
width (`int`, *optional*, defaults to 512):
|
499
499
|
The width in pixels of the generated image.
|
500
500
|
prior_guidance_scale (`float`, *optional*, defaults to 4.0):
|
501
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
502
|
-
`guidance_scale` is defined as `w` of equation 2.
|
503
|
-
Paper](https://
|
504
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
505
|
-
usually at the expense of lower image quality.
|
501
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
502
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
503
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
504
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
505
|
+
the text `prompt`, usually at the expense of lower image quality.
|
506
506
|
prior_num_inference_steps (`int`, *optional*, defaults to 100):
|
507
507
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
508
508
|
expense of slower inference.
|
@@ -656,7 +656,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
|
|
656
656
|
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
657
657
|
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
658
658
|
|
659
|
-
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] =
|
659
|
+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
|
660
660
|
r"""
|
661
661
|
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
662
662
|
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
@@ -722,11 +722,11 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
|
|
722
722
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
723
723
|
The number of images to generate per prompt.
|
724
724
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
725
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
726
|
-
`guidance_scale` is defined as `w` of equation 2.
|
727
|
-
Paper](https://
|
728
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
729
|
-
usually at the expense of lower image quality.
|
725
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
726
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
727
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
728
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
729
|
+
the text `prompt`, usually at the expense of lower image quality.
|
730
730
|
num_inference_steps (`int`, *optional*, defaults to 100):
|
731
731
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
732
732
|
expense of slower inference.
|
@@ -735,11 +735,11 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
|
|
735
735
|
width (`int`, *optional*, defaults to 512):
|
736
736
|
The width in pixels of the generated image.
|
737
737
|
prior_guidance_scale (`float`, *optional*, defaults to 4.0):
|
738
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
739
|
-
`guidance_scale` is defined as `w` of equation 2.
|
740
|
-
Paper](https://
|
741
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
742
|
-
usually at the expense of lower image quality.
|
738
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
739
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
740
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
741
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
742
|
+
the text `prompt`, usually at the expense of lower image quality.
|
743
743
|
prior_num_inference_steps (`int`, *optional*, defaults to 100):
|
744
744
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
745
745
|
expense of slower inference.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -198,11 +198,11 @@ class KandinskyV22ControlnetPipeline(DiffusionPipeline):
|
|
198
198
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
199
199
|
expense of slower inference.
|
200
200
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
201
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
202
|
-
`guidance_scale` is defined as `w` of equation 2.
|
203
|
-
Paper](https://
|
204
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
205
|
-
usually at the expense of lower image quality.
|
201
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
202
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
203
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
204
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
205
|
+
the text `prompt`, usually at the expense of lower image quality.
|
206
206
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
207
207
|
The number of images to generate per prompt.
|
208
208
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -14,11 +14,10 @@
|
|
14
14
|
|
15
15
|
from typing import Callable, List, Optional, Union
|
16
16
|
|
17
|
-
import numpy as np
|
18
17
|
import PIL.Image
|
19
18
|
import torch
|
20
|
-
from PIL import Image
|
21
19
|
|
20
|
+
from ...image_processor import VaeImageProcessor
|
22
21
|
from ...models import UNet2DConditionModel, VQModel
|
23
22
|
from ...schedulers import DDPMScheduler
|
24
23
|
from ...utils import (
|
@@ -105,27 +104,6 @@ EXAMPLE_DOC_STRING = """
|
|
105
104
|
"""
|
106
105
|
|
107
106
|
|
108
|
-
# Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.downscale_height_and_width
|
109
|
-
def downscale_height_and_width(height, width, scale_factor=8):
|
110
|
-
new_height = height // scale_factor**2
|
111
|
-
if height % scale_factor**2 != 0:
|
112
|
-
new_height += 1
|
113
|
-
new_width = width // scale_factor**2
|
114
|
-
if width % scale_factor**2 != 0:
|
115
|
-
new_width += 1
|
116
|
-
return new_height * scale_factor, new_width * scale_factor
|
117
|
-
|
118
|
-
|
119
|
-
# Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_img2img.prepare_image
|
120
|
-
def prepare_image(pil_image, w=512, h=512):
|
121
|
-
pil_image = pil_image.resize((w, h), resample=Image.BICUBIC, reducing_gap=1)
|
122
|
-
arr = np.array(pil_image.convert("RGB"))
|
123
|
-
arr = arr.astype(np.float32) / 127.5 - 1
|
124
|
-
arr = np.transpose(arr, [2, 0, 1])
|
125
|
-
image = torch.from_numpy(arr).unsqueeze(0)
|
126
|
-
return image
|
127
|
-
|
128
|
-
|
129
107
|
class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
|
130
108
|
"""
|
131
109
|
Pipeline for image-to-image generation using Kandinsky
|
@@ -157,7 +135,14 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
|
|
157
135
|
scheduler=scheduler,
|
158
136
|
movq=movq,
|
159
137
|
)
|
160
|
-
|
138
|
+
movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1) if getattr(self, "movq", None) else 8
|
139
|
+
movq_latent_channels = self.movq.config.latent_channels if getattr(self, "movq", None) else 4
|
140
|
+
self.image_processor = VaeImageProcessor(
|
141
|
+
vae_scale_factor=movq_scale_factor,
|
142
|
+
vae_latent_channels=movq_latent_channels,
|
143
|
+
resample="bicubic",
|
144
|
+
reducing_gap=1,
|
145
|
+
)
|
161
146
|
|
162
147
|
# Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_img2img.KandinskyImg2ImgPipeline.get_timesteps
|
163
148
|
def get_timesteps(self, num_inference_steps, strength, device):
|
@@ -259,11 +244,11 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
|
|
259
244
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
260
245
|
expense of slower inference.
|
261
246
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
262
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
263
|
-
`guidance_scale` is defined as `w` of equation 2.
|
264
|
-
Paper](https://
|
265
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
266
|
-
usually at the expense of lower image quality.
|
247
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
248
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
249
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
250
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
251
|
+
the text `prompt`, usually at the expense of lower image quality.
|
267
252
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
268
253
|
The number of images to generate per prompt.
|
269
254
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -316,7 +301,7 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
|
|
316
301
|
f"Input is in incorrect format: {[type(i) for i in image]}. Currently, we only support PIL image and pytorch tensor"
|
317
302
|
)
|
318
303
|
|
319
|
-
image = torch.cat([
|
304
|
+
image = torch.cat([self.image_processor.preprocess(i, width, height) for i in image], dim=0)
|
320
305
|
image = image.to(dtype=image_embeds.dtype, device=device)
|
321
306
|
|
322
307
|
latents = self.movq.encode(image)["latents"]
|
@@ -324,7 +309,6 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
|
|
324
309
|
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
325
310
|
timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
|
326
311
|
latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
|
327
|
-
height, width = downscale_height_and_width(height, width, self.movq_scale_factor)
|
328
312
|
latents = self.prepare_latents(
|
329
313
|
latents, latent_timestep, batch_size, num_images_per_prompt, image_embeds.dtype, device, generator
|
330
314
|
)
|
@@ -379,13 +363,7 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
|
|
379
363
|
if output_type not in ["pt", "np", "pil"]:
|
380
364
|
raise ValueError(f"Only the output types `pt`, `pil` and `np` are supported not output_type={output_type}")
|
381
365
|
|
382
|
-
|
383
|
-
image = image * 0.5 + 0.5
|
384
|
-
image = image.clamp(0, 1)
|
385
|
-
image = image.cpu().permute(0, 2, 3, 1).float().numpy()
|
386
|
-
|
387
|
-
if output_type == "pil":
|
388
|
-
image = self.numpy_to_pil(image)
|
366
|
+
image = self.image_processor.postprocess(image, output_type)
|
389
367
|
|
390
368
|
if not return_dict:
|
391
369
|
return (image,)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -14,11 +14,10 @@
|
|
14
14
|
|
15
15
|
from typing import Callable, Dict, List, Optional, Union
|
16
16
|
|
17
|
-
import numpy as np
|
18
17
|
import PIL.Image
|
19
18
|
import torch
|
20
|
-
from PIL import Image
|
21
19
|
|
20
|
+
from ...image_processor import VaeImageProcessor
|
22
21
|
from ...models import UNet2DConditionModel, VQModel
|
23
22
|
from ...schedulers import DDPMScheduler
|
24
23
|
from ...utils import deprecate, is_torch_xla_available, logging
|
@@ -76,27 +75,6 @@ EXAMPLE_DOC_STRING = """
|
|
76
75
|
"""
|
77
76
|
|
78
77
|
|
79
|
-
# Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.downscale_height_and_width
|
80
|
-
def downscale_height_and_width(height, width, scale_factor=8):
|
81
|
-
new_height = height // scale_factor**2
|
82
|
-
if height % scale_factor**2 != 0:
|
83
|
-
new_height += 1
|
84
|
-
new_width = width // scale_factor**2
|
85
|
-
if width % scale_factor**2 != 0:
|
86
|
-
new_width += 1
|
87
|
-
return new_height * scale_factor, new_width * scale_factor
|
88
|
-
|
89
|
-
|
90
|
-
# Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_img2img.prepare_image
|
91
|
-
def prepare_image(pil_image, w=512, h=512):
|
92
|
-
pil_image = pil_image.resize((w, h), resample=Image.BICUBIC, reducing_gap=1)
|
93
|
-
arr = np.array(pil_image.convert("RGB"))
|
94
|
-
arr = arr.astype(np.float32) / 127.5 - 1
|
95
|
-
arr = np.transpose(arr, [2, 0, 1])
|
96
|
-
image = torch.from_numpy(arr).unsqueeze(0)
|
97
|
-
return image
|
98
|
-
|
99
|
-
|
100
78
|
class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
|
101
79
|
"""
|
102
80
|
Pipeline for image-to-image generation using Kandinsky
|
@@ -129,7 +107,14 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
|
|
129
107
|
scheduler=scheduler,
|
130
108
|
movq=movq,
|
131
109
|
)
|
132
|
-
|
110
|
+
movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1) if getattr(self, "movq", None) else 8
|
111
|
+
movq_latent_channels = self.movq.config.latent_channels if getattr(self, "movq", None) else 4
|
112
|
+
self.image_processor = VaeImageProcessor(
|
113
|
+
vae_scale_factor=movq_scale_factor,
|
114
|
+
vae_latent_channels=movq_latent_channels,
|
115
|
+
resample="bicubic",
|
116
|
+
reducing_gap=1,
|
117
|
+
)
|
133
118
|
|
134
119
|
# Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_img2img.KandinskyImg2ImgPipeline.get_timesteps
|
135
120
|
def get_timesteps(self, num_inference_steps, strength, device):
|
@@ -240,11 +225,11 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
|
|
240
225
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
241
226
|
expense of slower inference.
|
242
227
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
243
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
244
|
-
`guidance_scale` is defined as `w` of equation 2.
|
245
|
-
Paper](https://
|
246
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
247
|
-
usually at the expense of lower image quality.
|
228
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
229
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
230
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
231
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
232
|
+
the text `prompt`, usually at the expense of lower image quality.
|
248
233
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
249
234
|
The number of images to generate per prompt.
|
250
235
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -319,7 +304,7 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
|
|
319
304
|
f"Input is in incorrect format: {[type(i) for i in image]}. Currently, we only support PIL image and pytorch tensor"
|
320
305
|
)
|
321
306
|
|
322
|
-
image = torch.cat([
|
307
|
+
image = torch.cat([self.image_processor.preprocess(i, width, height) for i in image], dim=0)
|
323
308
|
image = image.to(dtype=image_embeds.dtype, device=device)
|
324
309
|
|
325
310
|
latents = self.movq.encode(image)["latents"]
|
@@ -327,7 +312,6 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
|
|
327
312
|
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
328
313
|
timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
|
329
314
|
latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
|
330
|
-
height, width = downscale_height_and_width(height, width, self.movq_scale_factor)
|
331
315
|
latents = self.prepare_latents(
|
332
316
|
latents, latent_timestep, batch_size, num_images_per_prompt, image_embeds.dtype, device, generator
|
333
317
|
)
|
@@ -383,21 +367,9 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
|
|
383
367
|
if XLA_AVAILABLE:
|
384
368
|
xm.mark_step()
|
385
369
|
|
386
|
-
if output_type not in ["pt", "np", "pil", "latent"]:
|
387
|
-
raise ValueError(
|
388
|
-
f"Only the output types `pt`, `pil` ,`np` and `latent` are supported not output_type={output_type}"
|
389
|
-
)
|
390
|
-
|
391
370
|
if not output_type == "latent":
|
392
|
-
# post-processing
|
393
371
|
image = self.movq.decode(latents, force_not_quantize=True)["sample"]
|
394
|
-
|
395
|
-
image = image * 0.5 + 0.5
|
396
|
-
image = image.clamp(0, 1)
|
397
|
-
image = image.cpu().permute(0, 2, 3, 1).float().numpy()
|
398
|
-
|
399
|
-
if output_type == "pil":
|
400
|
-
image = self.numpy_to_pil(image)
|
372
|
+
image = self.image_processor.postprocess(image, output_type)
|
401
373
|
else:
|
402
374
|
image = latents
|
403
375
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -343,11 +343,11 @@ class KandinskyV22InpaintPipeline(DiffusionPipeline):
|
|
343
343
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
344
344
|
expense of slower inference.
|
345
345
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
346
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
347
|
-
`guidance_scale` is defined as `w` of equation 2.
|
348
|
-
Paper](https://
|
349
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
350
|
-
usually at the expense of lower image quality.
|
346
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
347
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
348
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
349
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
350
|
+
the text `prompt`, usually at the expense of lower image quality.
|
351
351
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
352
352
|
The number of images to generate per prompt.
|
353
353
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -386,7 +386,7 @@ class KandinskyV22InpaintPipeline(DiffusionPipeline):
|
|
386
386
|
"As of diffusers==0.19.0 this behavior has been inverted. Now white pixels are repainted and black pixels are preserved. "
|
387
387
|
"This way, Kandinsky's masking behavior is aligned with Stable Diffusion. "
|
388
388
|
"THIS means that you HAVE to invert the input mask to have the same behavior as before as explained in https://github.com/huggingface/diffusers/pull/4207. "
|
389
|
-
"This warning will be
|
389
|
+
"This warning will be suppressed after the first inference call and will be removed in diffusers>0.23.0"
|
390
390
|
)
|
391
391
|
self._warn_has_been_called = True
|
392
392
|
|
@@ -179,11 +179,11 @@ class KandinskyV22PriorPipeline(DiffusionPipeline):
|
|
179
179
|
The prompt not to guide the image generation. Ignored when not using guidance (i.e., ignored if
|
180
180
|
`guidance_scale` is less than `1`).
|
181
181
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
182
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
183
|
-
`guidance_scale` is defined as `w` of equation 2.
|
184
|
-
Paper](https://
|
185
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
186
|
-
usually at the expense of lower image quality.
|
182
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
183
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
184
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
185
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
186
|
+
the text `prompt`, usually at the expense of lower image quality.
|
187
187
|
|
188
188
|
Examples:
|
189
189
|
|
@@ -414,11 +414,11 @@ class KandinskyV22PriorPipeline(DiffusionPipeline):
|
|
414
414
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
415
415
|
tensor will ge generated by sampling using the supplied random `generator`.
|
416
416
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
417
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
418
|
-
`guidance_scale` is defined as `w` of equation 2.
|
419
|
-
Paper](https://
|
420
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
421
|
-
usually at the expense of lower image quality.
|
417
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
418
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
419
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
420
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
421
|
+
the text `prompt`, usually at the expense of lower image quality.
|
422
422
|
output_type (`str`, *optional*, defaults to `"pt"`):
|
423
423
|
The output format of the generate image. Choose between: `"np"` (`np.array`) or `"pt"`
|
424
424
|
(`torch.Tensor`).
|
@@ -203,11 +203,11 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline):
|
|
203
203
|
The prompt not to guide the image generation. Ignored when not using guidance (i.e., ignored if
|
204
204
|
`guidance_scale` is less than `1`).
|
205
205
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
206
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
207
|
-
`guidance_scale` is defined as `w` of equation 2.
|
208
|
-
Paper](https://
|
209
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
210
|
-
usually at the expense of lower image quality.
|
206
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
207
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
208
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
209
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
210
|
+
the text `prompt`, usually at the expense of lower image quality.
|
211
211
|
|
212
212
|
Examples:
|
213
213
|
|
@@ -441,11 +441,11 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline):
|
|
441
441
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
442
442
|
to make generation deterministic.
|
443
443
|
guidance_scale (`float`, *optional*, defaults to 4.0):
|
444
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
445
|
-
`guidance_scale` is defined as `w` of equation 2.
|
446
|
-
Paper](https://
|
447
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
448
|
-
usually at the expense of lower image quality.
|
444
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
445
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
446
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
447
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
448
|
+
the text `prompt`, usually at the expense of lower image quality.
|
449
449
|
output_type (`str`, *optional*, defaults to `"pt"`):
|
450
450
|
The output format of the generate image. Choose between: `"np"` (`np.array`) or `"pt"`
|
451
451
|
(`torch.Tensor`).
|
@@ -368,11 +368,11 @@ class Kandinsky3Pipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
|
|
368
368
|
Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
|
369
369
|
timesteps are used. Must be in descending order.
|
370
370
|
guidance_scale (`float`, *optional*, defaults to 3.0):
|
371
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
372
|
-
`guidance_scale` is defined as `w` of equation 2.
|
373
|
-
Paper](https://
|
374
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
375
|
-
usually at the expense of lower image quality.
|
371
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
372
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
373
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
374
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
375
|
+
the text `prompt`, usually at the expense of lower image quality.
|
376
376
|
negative_prompt (`str` or `List[str]`, *optional*):
|
377
377
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
378
378
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -384,8 +384,8 @@ class Kandinsky3Pipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
|
|
384
384
|
width (`int`, *optional*, defaults to self.unet.config.sample_size):
|
385
385
|
The width in pixels of the generated image.
|
386
386
|
eta (`float`, *optional*, defaults to 0.0):
|
387
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
388
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
387
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
388
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
389
389
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
390
390
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
391
391
|
to make generation deterministic.
|