diffusers 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +198 -28
- diffusers/loaders/lora_conversion_utils.py +679 -44
- diffusers/loaders/lora_pipeline.py +1963 -801
- diffusers/loaders/peft.py +169 -84
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +653 -75
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +22 -32
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +409 -49
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +593 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +10 -2
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +14 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
- diffusers-0.33.0.dist-info/RECORD +608 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
- diffusers-0.32.1.dist-info/RECORD +0 -550
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -77,7 +77,7 @@ def calculate_shift(
|
|
77
77
|
base_seq_len: int = 256,
|
78
78
|
max_seq_len: int = 4096,
|
79
79
|
base_shift: float = 0.5,
|
80
|
-
max_shift: float = 1.
|
80
|
+
max_shift: float = 1.15,
|
81
81
|
):
|
82
82
|
m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
|
83
83
|
b = base_shift - m * base_seq_len
|
@@ -205,16 +205,22 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
|
|
205
205
|
scheduler=scheduler,
|
206
206
|
)
|
207
207
|
|
208
|
-
self.vae_spatial_compression_ratio =
|
209
|
-
|
210
|
-
|
208
|
+
self.vae_spatial_compression_ratio = (
|
209
|
+
self.vae.spatial_compression_ratio if getattr(self, "vae", None) is not None else 32
|
210
|
+
)
|
211
|
+
self.vae_temporal_compression_ratio = (
|
212
|
+
self.vae.temporal_compression_ratio if getattr(self, "vae", None) is not None else 8
|
213
|
+
)
|
214
|
+
self.transformer_spatial_patch_size = (
|
215
|
+
self.transformer.config.patch_size if getattr(self, "transformer", None) is not None else 1
|
216
|
+
)
|
211
217
|
self.transformer_temporal_patch_size = (
|
212
|
-
self.transformer.config.patch_size_t if
|
218
|
+
self.transformer.config.patch_size_t if getattr(self, "transformer") is not None else 1
|
213
219
|
)
|
214
220
|
|
215
221
|
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_spatial_compression_ratio)
|
216
222
|
self.tokenizer_max_length = (
|
217
|
-
self.tokenizer.model_max_length if
|
223
|
+
self.tokenizer.model_max_length if getattr(self, "tokenizer", None) is not None else 128
|
218
224
|
)
|
219
225
|
|
220
226
|
self.default_height = 512
|
@@ -481,19 +487,21 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
|
|
481
487
|
) -> torch.Tensor:
|
482
488
|
height = height // self.vae_spatial_compression_ratio
|
483
489
|
width = width // self.vae_spatial_compression_ratio
|
484
|
-
num_frames = (
|
485
|
-
(num_frames - 1) // self.vae_temporal_compression_ratio + 1 if latents is None else latents.size(2)
|
486
|
-
)
|
490
|
+
num_frames = (num_frames - 1) // self.vae_temporal_compression_ratio + 1
|
487
491
|
|
488
492
|
shape = (batch_size, num_channels_latents, num_frames, height, width)
|
489
493
|
mask_shape = (batch_size, 1, num_frames, height, width)
|
490
494
|
|
491
495
|
if latents is not None:
|
492
|
-
conditioning_mask = latents.new_zeros(
|
496
|
+
conditioning_mask = latents.new_zeros(mask_shape)
|
493
497
|
conditioning_mask[:, :, 0] = 1.0
|
494
498
|
conditioning_mask = self._pack_latents(
|
495
499
|
conditioning_mask, self.transformer_spatial_patch_size, self.transformer_temporal_patch_size
|
496
|
-
)
|
500
|
+
).squeeze(-1)
|
501
|
+
if latents.ndim != 3 or latents.shape[:2] != conditioning_mask.shape:
|
502
|
+
raise ValueError(
|
503
|
+
f"Provided `latents` tensor has shape {latents.shape}, but the expected shape is {conditioning_mask.shape + (num_channels_latents,)}."
|
504
|
+
)
|
497
505
|
return latents.to(device=device, dtype=dtype), conditioning_mask
|
498
506
|
|
499
507
|
if isinstance(generator, list):
|
@@ -542,6 +550,10 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
|
|
542
550
|
def num_timesteps(self):
|
543
551
|
return self._num_timesteps
|
544
552
|
|
553
|
+
@property
|
554
|
+
def current_timestep(self):
|
555
|
+
return self._current_timestep
|
556
|
+
|
545
557
|
@property
|
546
558
|
def attention_kwargs(self):
|
547
559
|
return self._attention_kwargs
|
@@ -678,6 +690,7 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
|
|
678
690
|
self._guidance_scale = guidance_scale
|
679
691
|
self._attention_kwargs = attention_kwargs
|
680
692
|
self._interrupt = False
|
693
|
+
self._current_timestep = None
|
681
694
|
|
682
695
|
# 2. Define call parameters
|
683
696
|
if prompt is not None and isinstance(prompt, str):
|
@@ -741,10 +754,10 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
|
|
741
754
|
sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
|
742
755
|
mu = calculate_shift(
|
743
756
|
video_sequence_length,
|
744
|
-
self.scheduler.config.base_image_seq_len,
|
745
|
-
self.scheduler.config.max_image_seq_len,
|
746
|
-
self.scheduler.config.base_shift,
|
747
|
-
self.scheduler.config.max_shift,
|
757
|
+
self.scheduler.config.get("base_image_seq_len", 256),
|
758
|
+
self.scheduler.config.get("max_image_seq_len", 4096),
|
759
|
+
self.scheduler.config.get("base_shift", 0.5),
|
760
|
+
self.scheduler.config.get("max_shift", 1.15),
|
748
761
|
)
|
749
762
|
timesteps, num_inference_steps = retrieve_timesteps(
|
750
763
|
self.scheduler,
|
@@ -758,9 +771,8 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
|
|
758
771
|
self._num_timesteps = len(timesteps)
|
759
772
|
|
760
773
|
# 6. Prepare micro-conditions
|
761
|
-
latent_frame_rate = frame_rate / self.vae_temporal_compression_ratio
|
762
774
|
rope_interpolation_scale = (
|
763
|
-
|
775
|
+
self.vae_temporal_compression_ratio / frame_rate,
|
764
776
|
self.vae_spatial_compression_ratio,
|
765
777
|
self.vae_spatial_compression_ratio,
|
766
778
|
)
|
@@ -771,6 +783,8 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
|
|
771
783
|
if self.interrupt:
|
772
784
|
continue
|
773
785
|
|
786
|
+
self._current_timestep = t
|
787
|
+
|
774
788
|
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
|
775
789
|
latent_model_input = latent_model_input.to(prompt_embeds.dtype)
|
776
790
|
|
@@ -22,7 +22,7 @@ except OptionalDependencyNotAvailable:
|
|
22
22
|
|
23
23
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
24
|
else:
|
25
|
-
_import_structure["pipeline_lumina"] = ["LuminaText2ImgPipeline"]
|
25
|
+
_import_structure["pipeline_lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
|
26
26
|
|
27
27
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
28
|
try:
|
@@ -32,7 +32,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
32
32
|
except OptionalDependencyNotAvailable:
|
33
33
|
from ...utils.dummy_torch_and_transformers_objects import *
|
34
34
|
else:
|
35
|
-
from .pipeline_lumina import LuminaText2ImgPipeline
|
35
|
+
from .pipeline_lumina import LuminaPipeline, LuminaText2ImgPipeline
|
36
36
|
|
37
37
|
else:
|
38
38
|
import sys
|
@@ -17,11 +17,12 @@ import inspect
|
|
17
17
|
import math
|
18
18
|
import re
|
19
19
|
import urllib.parse as ul
|
20
|
-
from typing import List, Optional, Tuple, Union
|
20
|
+
from typing import Callable, Dict, List, Optional, Tuple, Union
|
21
21
|
|
22
22
|
import torch
|
23
|
-
from transformers import
|
23
|
+
from transformers import GemmaPreTrainedModel, GemmaTokenizer, GemmaTokenizerFast
|
24
24
|
|
25
|
+
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
25
26
|
from ...image_processor import VaeImageProcessor
|
26
27
|
from ...models import AutoencoderKL
|
27
28
|
from ...models.embeddings import get_2d_rotary_pos_embed_lumina
|
@@ -29,8 +30,10 @@ from ...models.transformers.lumina_nextdit2d import LuminaNextDiT2DModel
|
|
29
30
|
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
30
31
|
from ...utils import (
|
31
32
|
BACKENDS_MAPPING,
|
33
|
+
deprecate,
|
32
34
|
is_bs4_available,
|
33
35
|
is_ftfy_available,
|
36
|
+
is_torch_xla_available,
|
34
37
|
logging,
|
35
38
|
replace_example_docstring,
|
36
39
|
)
|
@@ -38,8 +41,16 @@ from ...utils.torch_utils import randn_tensor
|
|
38
41
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
39
42
|
|
40
43
|
|
44
|
+
if is_torch_xla_available():
|
45
|
+
import torch_xla.core.xla_model as xm
|
46
|
+
|
47
|
+
XLA_AVAILABLE = True
|
48
|
+
else:
|
49
|
+
XLA_AVAILABLE = False
|
50
|
+
|
41
51
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
42
52
|
|
53
|
+
|
43
54
|
if is_bs4_available():
|
44
55
|
from bs4 import BeautifulSoup
|
45
56
|
|
@@ -50,11 +61,9 @@ EXAMPLE_DOC_STRING = """
|
|
50
61
|
Examples:
|
51
62
|
```py
|
52
63
|
>>> import torch
|
53
|
-
>>> from diffusers import
|
64
|
+
>>> from diffusers import LuminaPipeline
|
54
65
|
|
55
|
-
>>> pipe =
|
56
|
-
... "Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16
|
57
|
-
... )
|
66
|
+
>>> pipe = LuminaPipeline.from_pretrained("Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16)
|
58
67
|
>>> # Enable memory optimizations.
|
59
68
|
>>> pipe.enable_model_cpu_offload()
|
60
69
|
|
@@ -124,7 +133,7 @@ def retrieve_timesteps(
|
|
124
133
|
return timesteps, num_inference_steps
|
125
134
|
|
126
135
|
|
127
|
-
class
|
136
|
+
class LuminaPipeline(DiffusionPipeline):
|
128
137
|
r"""
|
129
138
|
Pipeline for text-to-image generation using Lumina-T2I.
|
130
139
|
|
@@ -134,13 +143,10 @@ class LuminaText2ImgPipeline(DiffusionPipeline):
|
|
134
143
|
Args:
|
135
144
|
vae ([`AutoencoderKL`]):
|
136
145
|
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
|
137
|
-
text_encoder ([`
|
138
|
-
Frozen text-encoder.
|
139
|
-
|
140
|
-
|
141
|
-
tokenizer (`AutoModel`):
|
142
|
-
Tokenizer of class
|
143
|
-
[AutoModel](https://huggingface.co/docs/transformers/model_doc/t5#transformers.AutoModel).
|
146
|
+
text_encoder ([`GemmaPreTrainedModel`]):
|
147
|
+
Frozen Gemma text-encoder.
|
148
|
+
tokenizer (`GemmaTokenizer` or `GemmaTokenizerFast`):
|
149
|
+
Gemma tokenizer.
|
144
150
|
transformer ([`Transformer2DModel`]):
|
145
151
|
A text conditioned `Transformer2DModel` to denoise the encoded image latents.
|
146
152
|
scheduler ([`SchedulerMixin`]):
|
@@ -165,14 +171,18 @@ class LuminaText2ImgPipeline(DiffusionPipeline):
|
|
165
171
|
|
166
172
|
_optional_components = []
|
167
173
|
model_cpu_offload_seq = "text_encoder->transformer->vae"
|
174
|
+
_callback_tensor_inputs = [
|
175
|
+
"latents",
|
176
|
+
"prompt_embeds",
|
177
|
+
]
|
168
178
|
|
169
179
|
def __init__(
|
170
180
|
self,
|
171
181
|
transformer: LuminaNextDiT2DModel,
|
172
182
|
scheduler: FlowMatchEulerDiscreteScheduler,
|
173
183
|
vae: AutoencoderKL,
|
174
|
-
text_encoder:
|
175
|
-
tokenizer:
|
184
|
+
text_encoder: GemmaPreTrainedModel,
|
185
|
+
tokenizer: Union[GemmaTokenizer, GemmaTokenizerFast],
|
176
186
|
):
|
177
187
|
super().__init__()
|
178
188
|
|
@@ -386,9 +396,19 @@ class LuminaText2ImgPipeline(DiffusionPipeline):
|
|
386
396
|
negative_prompt_embeds=None,
|
387
397
|
prompt_attention_mask=None,
|
388
398
|
negative_prompt_attention_mask=None,
|
399
|
+
callback_on_step_end_tensor_inputs=None,
|
389
400
|
):
|
390
|
-
if height %
|
391
|
-
raise ValueError(
|
401
|
+
if height % (self.vae_scale_factor * 2) != 0 or width % (self.vae_scale_factor * 2) != 0:
|
402
|
+
raise ValueError(
|
403
|
+
f"`height` and `width` have to be divisible by {self.vae_scale_factor * 2} but are {height} and {width}."
|
404
|
+
)
|
405
|
+
|
406
|
+
if callback_on_step_end_tensor_inputs is not None and not all(
|
407
|
+
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
|
408
|
+
):
|
409
|
+
raise ValueError(
|
410
|
+
f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
|
411
|
+
)
|
392
412
|
|
393
413
|
if prompt is not None and prompt_embeds is not None:
|
394
414
|
raise ValueError(
|
@@ -633,6 +653,10 @@ class LuminaText2ImgPipeline(DiffusionPipeline):
|
|
633
653
|
max_sequence_length: int = 256,
|
634
654
|
scaling_watershed: Optional[float] = 1.0,
|
635
655
|
proportional_attn: Optional[bool] = True,
|
656
|
+
callback_on_step_end: Optional[
|
657
|
+
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
658
|
+
] = None,
|
659
|
+
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
636
660
|
) -> Union[ImagePipelineOutput, Tuple]:
|
637
661
|
"""
|
638
662
|
Function invoked when calling the pipeline for generation.
|
@@ -724,7 +748,11 @@ class LuminaText2ImgPipeline(DiffusionPipeline):
|
|
724
748
|
negative_prompt_embeds=negative_prompt_embeds,
|
725
749
|
prompt_attention_mask=prompt_attention_mask,
|
726
750
|
negative_prompt_attention_mask=negative_prompt_attention_mask,
|
751
|
+
callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
|
727
752
|
)
|
753
|
+
|
754
|
+
self._guidance_scale = guidance_scale
|
755
|
+
|
728
756
|
cross_attention_kwargs = {}
|
729
757
|
|
730
758
|
# 2. Define call parameters
|
@@ -786,6 +814,8 @@ class LuminaText2ImgPipeline(DiffusionPipeline):
|
|
786
814
|
latents,
|
787
815
|
)
|
788
816
|
|
817
|
+
self._num_timesteps = len(timesteps)
|
818
|
+
|
789
819
|
# 6. Denoising loop
|
790
820
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
791
821
|
for i, t in enumerate(timesteps):
|
@@ -797,10 +827,11 @@ class LuminaText2ImgPipeline(DiffusionPipeline):
|
|
797
827
|
# TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
|
798
828
|
# This would be a good case for the `match` statement (Python 3.10+)
|
799
829
|
is_mps = latent_model_input.device.type == "mps"
|
830
|
+
is_npu = latent_model_input.device.type == "npu"
|
800
831
|
if isinstance(current_timestep, float):
|
801
|
-
dtype = torch.float32 if is_mps else torch.float64
|
832
|
+
dtype = torch.float32 if (is_mps or is_npu) else torch.float64
|
802
833
|
else:
|
803
|
-
dtype = torch.int32 if is_mps else torch.int64
|
834
|
+
dtype = torch.int32 if (is_mps or is_npu) else torch.int64
|
804
835
|
current_timestep = torch.tensor(
|
805
836
|
[current_timestep],
|
806
837
|
dtype=dtype,
|
@@ -874,6 +905,18 @@ class LuminaText2ImgPipeline(DiffusionPipeline):
|
|
874
905
|
|
875
906
|
progress_bar.update()
|
876
907
|
|
908
|
+
if callback_on_step_end is not None:
|
909
|
+
callback_kwargs = {}
|
910
|
+
for k in callback_on_step_end_tensor_inputs:
|
911
|
+
callback_kwargs[k] = locals()[k]
|
912
|
+
callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
|
913
|
+
|
914
|
+
latents = callback_outputs.pop("latents", latents)
|
915
|
+
prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
|
916
|
+
|
917
|
+
if XLA_AVAILABLE:
|
918
|
+
xm.mark_step()
|
919
|
+
|
877
920
|
if not output_type == "latent":
|
878
921
|
latents = latents / self.vae.config.scaling_factor
|
879
922
|
image = self.vae.decode(latents, return_dict=False)[0]
|
@@ -888,3 +931,23 @@ class LuminaText2ImgPipeline(DiffusionPipeline):
|
|
888
931
|
return (image,)
|
889
932
|
|
890
933
|
return ImagePipelineOutput(images=image)
|
934
|
+
|
935
|
+
|
936
|
+
class LuminaText2ImgPipeline(LuminaPipeline):
|
937
|
+
def __init__(
|
938
|
+
self,
|
939
|
+
transformer: LuminaNextDiT2DModel,
|
940
|
+
scheduler: FlowMatchEulerDiscreteScheduler,
|
941
|
+
vae: AutoencoderKL,
|
942
|
+
text_encoder: GemmaPreTrainedModel,
|
943
|
+
tokenizer: Union[GemmaTokenizer, GemmaTokenizerFast],
|
944
|
+
):
|
945
|
+
deprecation_message = "`LuminaText2ImgPipeline` has been renamed to `LuminaPipeline` and will be removed in a future version. Please use `LuminaPipeline` instead."
|
946
|
+
deprecate("diffusers.pipelines.lumina.pipeline_lumina.LuminaText2ImgPipeline", "0.34", deprecation_message)
|
947
|
+
super().__init__(
|
948
|
+
transformer=transformer,
|
949
|
+
scheduler=scheduler,
|
950
|
+
vae=vae,
|
951
|
+
text_encoder=text_encoder,
|
952
|
+
tokenizer=tokenizer,
|
953
|
+
)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_import_structure = {}
|
15
|
+
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
|
26
|
+
|
27
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
|
+
try:
|
29
|
+
if not (is_transformers_available() and is_torch_available()):
|
30
|
+
raise OptionalDependencyNotAvailable()
|
31
|
+
|
32
|
+
except OptionalDependencyNotAvailable:
|
33
|
+
from ...utils.dummy_torch_and_transformers_objects import *
|
34
|
+
else:
|
35
|
+
from .pipeline_lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline
|
36
|
+
|
37
|
+
else:
|
38
|
+
import sys
|
39
|
+
|
40
|
+
sys.modules[__name__] = _LazyModule(
|
41
|
+
__name__,
|
42
|
+
globals()["__file__"],
|
43
|
+
_import_structure,
|
44
|
+
module_spec=__spec__,
|
45
|
+
)
|
46
|
+
|
47
|
+
for name, value in _dummy_objects.items():
|
48
|
+
setattr(sys.modules[__name__], name, value)
|