diffusers 0.27.2__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +19 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +20 -26
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +42 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
- diffusers/schedulers/scheduling_edm_euler.py +50 -31
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
- diffusers/schedulers/scheduling_euler_discrete.py +160 -68
- diffusers/schedulers/scheduling_heun_discrete.py +57 -39
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +24 -26
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/METADATA +47 -47
- diffusers-0.28.0.dist-info/RECORD +414 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/WHEEL +1 -1
- diffusers-0.27.2.dist-info/RECORD +0 -399
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
diffusers/pipelines/__init__.py
CHANGED
@@ -24,6 +24,7 @@ _import_structure = {
|
|
24
24
|
"deprecated": [],
|
25
25
|
"latent_diffusion": [],
|
26
26
|
"ledits_pp": [],
|
27
|
+
"marigold": [],
|
27
28
|
"stable_diffusion": [],
|
28
29
|
"stable_diffusion_xl": [],
|
29
30
|
}
|
@@ -114,6 +115,7 @@ else:
|
|
114
115
|
_import_structure["amused"] = ["AmusedImg2ImgPipeline", "AmusedInpaintPipeline", "AmusedPipeline"]
|
115
116
|
_import_structure["animatediff"] = [
|
116
117
|
"AnimateDiffPipeline",
|
118
|
+
"AnimateDiffSDXLPipeline",
|
117
119
|
"AnimateDiffVideoToVideoPipeline",
|
118
120
|
]
|
119
121
|
_import_structure["audioldm"] = ["AudioLDMPipeline"]
|
@@ -134,6 +136,12 @@ else:
|
|
134
136
|
"StableDiffusionXLControlNetPipeline",
|
135
137
|
]
|
136
138
|
)
|
139
|
+
_import_structure["controlnet_xs"].extend(
|
140
|
+
[
|
141
|
+
"StableDiffusionControlNetXSPipeline",
|
142
|
+
"StableDiffusionXLControlNetXSPipeline",
|
143
|
+
]
|
144
|
+
)
|
137
145
|
_import_structure["deepfloyd_if"] = [
|
138
146
|
"IFImg2ImgPipeline",
|
139
147
|
"IFImg2ImgSuperResolutionPipeline",
|
@@ -178,10 +186,16 @@ else:
|
|
178
186
|
"LEditsPPPipelineStableDiffusionXL",
|
179
187
|
]
|
180
188
|
)
|
189
|
+
_import_structure["marigold"].extend(
|
190
|
+
[
|
191
|
+
"MarigoldDepthPipeline",
|
192
|
+
"MarigoldNormalsPipeline",
|
193
|
+
]
|
194
|
+
)
|
181
195
|
_import_structure["musicldm"] = ["MusicLDMPipeline"]
|
182
196
|
_import_structure["paint_by_example"] = ["PaintByExamplePipeline"]
|
183
197
|
_import_structure["pia"] = ["PIAPipeline"]
|
184
|
-
_import_structure["pixart_alpha"] = ["PixArtAlphaPipeline"]
|
198
|
+
_import_structure["pixart_alpha"] = ["PixArtAlphaPipeline", "PixArtSigmaPipeline"]
|
185
199
|
_import_structure["semantic_stable_diffusion"] = ["SemanticStableDiffusionPipeline"]
|
186
200
|
_import_structure["shap_e"] = ["ShapEImg2ImgPipeline", "ShapEPipeline"]
|
187
201
|
_import_structure["stable_cascade"] = [
|
@@ -361,7 +375,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
361
375
|
from ..utils.dummy_torch_and_transformers_objects import *
|
362
376
|
else:
|
363
377
|
from .amused import AmusedImg2ImgPipeline, AmusedInpaintPipeline, AmusedPipeline
|
364
|
-
from .animatediff import AnimateDiffPipeline, AnimateDiffVideoToVideoPipeline
|
378
|
+
from .animatediff import AnimateDiffPipeline, AnimateDiffSDXLPipeline, AnimateDiffVideoToVideoPipeline
|
365
379
|
from .audioldm import AudioLDMPipeline
|
366
380
|
from .audioldm2 import (
|
367
381
|
AudioLDM2Pipeline,
|
@@ -378,6 +392,10 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
378
392
|
StableDiffusionXLControlNetInpaintPipeline,
|
379
393
|
StableDiffusionXLControlNetPipeline,
|
380
394
|
)
|
395
|
+
from .controlnet_xs import (
|
396
|
+
StableDiffusionControlNetXSPipeline,
|
397
|
+
StableDiffusionXLControlNetXSPipeline,
|
398
|
+
)
|
381
399
|
from .deepfloyd_if import (
|
382
400
|
IFImg2ImgPipeline,
|
383
401
|
IFImg2ImgSuperResolutionPipeline,
|
@@ -437,10 +455,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
437
455
|
LEditsPPPipelineStableDiffusion,
|
438
456
|
LEditsPPPipelineStableDiffusionXL,
|
439
457
|
)
|
458
|
+
from .marigold import (
|
459
|
+
MarigoldDepthPipeline,
|
460
|
+
MarigoldNormalsPipeline,
|
461
|
+
)
|
440
462
|
from .musicldm import MusicLDMPipeline
|
441
463
|
from .paint_by_example import PaintByExamplePipeline
|
442
464
|
from .pia import PIAPipeline
|
443
|
-
from .pixart_alpha import PixArtAlphaPipeline
|
465
|
+
from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
|
444
466
|
from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
|
445
467
|
from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
|
446
468
|
from .stable_cascade import (
|
@@ -30,9 +30,7 @@ EXAMPLE_DOC_STRING = """
|
|
30
30
|
>>> import torch
|
31
31
|
>>> from diffusers import AmusedPipeline
|
32
32
|
|
33
|
-
>>> pipe = AmusedPipeline.from_pretrained(
|
34
|
-
... "amused/amused-512", variant="fp16", torch_dtype=torch.float16
|
35
|
-
... )
|
33
|
+
>>> pipe = AmusedPipeline.from_pretrained("amused/amused-512", variant="fp16", torch_dtype=torch.float16)
|
36
34
|
>>> pipe = pipe.to("cuda")
|
37
35
|
|
38
36
|
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
@@ -90,7 +88,7 @@ class AmusedPipeline(DiffusionPipeline):
|
|
90
88
|
negative_encoder_hidden_states: Optional[torch.Tensor] = None,
|
91
89
|
output_type="pil",
|
92
90
|
return_dict: bool = True,
|
93
|
-
callback: Optional[Callable[[int, int, torch.
|
91
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
94
92
|
callback_steps: int = 1,
|
95
93
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
96
94
|
micro_conditioning_aesthetic_score: int = 6,
|
@@ -124,16 +122,16 @@ class AmusedPipeline(DiffusionPipeline):
|
|
124
122
|
latents (`torch.IntTensor`, *optional*):
|
125
123
|
Pre-generated tokens representing latent vectors in `self.vqvae`, to be used as inputs for image
|
126
124
|
gneration. If not provided, the starting latents will be completely masked.
|
127
|
-
prompt_embeds (`torch.
|
125
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
128
126
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
129
127
|
provided, text embeddings are generated from the `prompt` input argument. A single vector from the
|
130
128
|
pooled and projected final hidden states.
|
131
|
-
encoder_hidden_states (`torch.
|
129
|
+
encoder_hidden_states (`torch.Tensor`, *optional*):
|
132
130
|
Pre-generated penultimate hidden states from the text encoder providing additional text conditioning.
|
133
|
-
negative_prompt_embeds (`torch.
|
131
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
134
132
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
135
133
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
136
|
-
negative_encoder_hidden_states (`torch.
|
134
|
+
negative_encoder_hidden_states (`torch.Tensor`, *optional*):
|
137
135
|
Analogous to `encoder_hidden_states` for the positive prompt.
|
138
136
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
139
137
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
@@ -142,7 +140,7 @@ class AmusedPipeline(DiffusionPipeline):
|
|
142
140
|
plain tuple.
|
143
141
|
callback (`Callable`, *optional*):
|
144
142
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
145
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
143
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
146
144
|
callback_steps (`int`, *optional*, defaults to 1):
|
147
145
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
148
146
|
every step.
|
@@ -150,10 +148,12 @@ class AmusedPipeline(DiffusionPipeline):
|
|
150
148
|
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
151
149
|
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
152
150
|
micro_conditioning_aesthetic_score (`int`, *optional*, defaults to 6):
|
153
|
-
The targeted aesthetic score according to the laion aesthetic classifier. See
|
154
|
-
and the micro-conditioning section of
|
151
|
+
The targeted aesthetic score according to the laion aesthetic classifier. See
|
152
|
+
https://laion.ai/blog/laion-aesthetics/ and the micro-conditioning section of
|
153
|
+
https://arxiv.org/abs/2307.01952.
|
155
154
|
micro_conditioning_crop_coord (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
156
|
-
The targeted height, width crop coordinates. See the micro-conditioning section of
|
155
|
+
The targeted height, width crop coordinates. See the micro-conditioning section of
|
156
|
+
https://arxiv.org/abs/2307.01952.
|
157
157
|
temperature (`Union[int, Tuple[int, int], List[int]]`, *optional*, defaults to (2, 0)):
|
158
158
|
Configures the temperature scheduler on `self.scheduler` see `AmusedScheduler#set_timesteps`.
|
159
159
|
|
@@ -102,7 +102,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
|
102
102
|
negative_encoder_hidden_states: Optional[torch.Tensor] = None,
|
103
103
|
output_type="pil",
|
104
104
|
return_dict: bool = True,
|
105
|
-
callback: Optional[Callable[[int, int, torch.
|
105
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
106
106
|
callback_steps: int = 1,
|
107
107
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
108
108
|
micro_conditioning_aesthetic_score: int = 6,
|
@@ -115,7 +115,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
|
115
115
|
Args:
|
116
116
|
prompt (`str` or `List[str]`, *optional*):
|
117
117
|
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
118
|
-
image (`torch.
|
118
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
119
119
|
`Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
|
120
120
|
numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
|
121
121
|
or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
|
@@ -127,7 +127,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
|
127
127
|
on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising
|
128
128
|
process runs for the full number of iterations specified in `num_inference_steps`. A value of 1
|
129
129
|
essentially ignores `image`.
|
130
|
-
num_inference_steps (`int`, *optional*, defaults to
|
130
|
+
num_inference_steps (`int`, *optional*, defaults to 12):
|
131
131
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
132
132
|
expense of slower inference.
|
133
133
|
guidance_scale (`float`, *optional*, defaults to 10.0):
|
@@ -141,16 +141,16 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
|
141
141
|
generator (`torch.Generator`, *optional*):
|
142
142
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
143
143
|
generation deterministic.
|
144
|
-
prompt_embeds (`torch.
|
144
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
145
145
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
146
146
|
provided, text embeddings are generated from the `prompt` input argument. A single vector from the
|
147
147
|
pooled and projected final hidden states.
|
148
|
-
encoder_hidden_states (`torch.
|
148
|
+
encoder_hidden_states (`torch.Tensor`, *optional*):
|
149
149
|
Pre-generated penultimate hidden states from the text encoder providing additional text conditioning.
|
150
|
-
negative_prompt_embeds (`torch.
|
150
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
151
151
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
152
152
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
153
|
-
negative_encoder_hidden_states (`torch.
|
153
|
+
negative_encoder_hidden_states (`torch.Tensor`, *optional*):
|
154
154
|
Analogous to `encoder_hidden_states` for the positive prompt.
|
155
155
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
156
156
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
@@ -159,7 +159,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
|
159
159
|
plain tuple.
|
160
160
|
callback (`Callable`, *optional*):
|
161
161
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
162
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
162
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
163
163
|
callback_steps (`int`, *optional*, defaults to 1):
|
164
164
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
165
165
|
every step.
|
@@ -167,10 +167,12 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
|
167
167
|
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
168
168
|
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
169
169
|
micro_conditioning_aesthetic_score (`int`, *optional*, defaults to 6):
|
170
|
-
The targeted aesthetic score according to the laion aesthetic classifier. See
|
171
|
-
and the micro-conditioning section of
|
170
|
+
The targeted aesthetic score according to the laion aesthetic classifier. See
|
171
|
+
https://laion.ai/blog/laion-aesthetics/ and the micro-conditioning section of
|
172
|
+
https://arxiv.org/abs/2307.01952.
|
172
173
|
micro_conditioning_crop_coord (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
173
|
-
The targeted height, width crop coordinates. See the micro-conditioning section of
|
174
|
+
The targeted height, width crop coordinates. See the micro-conditioning section of
|
175
|
+
https://arxiv.org/abs/2307.01952.
|
174
176
|
temperature (`Union[int, Tuple[int, int], List[int]]`, *optional*, defaults to (2, 0)):
|
175
177
|
Configures the temperature scheduler on `self.scheduler` see `AmusedScheduler#set_timesteps`.
|
176
178
|
|
@@ -191,7 +193,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
|
191
193
|
negative_prompt_embeds is None and negative_encoder_hidden_states is not None
|
192
194
|
):
|
193
195
|
raise ValueError(
|
194
|
-
"pass either both `
|
196
|
+
"pass either both `negative_prompt_embeds` and `negative_encoder_hidden_states` or neither"
|
195
197
|
)
|
196
198
|
|
197
199
|
if (prompt is None and prompt_embeds is None) or (prompt is not None and prompt_embeds is not None):
|
@@ -119,7 +119,7 @@ class AmusedInpaintPipeline(DiffusionPipeline):
|
|
119
119
|
negative_encoder_hidden_states: Optional[torch.Tensor] = None,
|
120
120
|
output_type="pil",
|
121
121
|
return_dict: bool = True,
|
122
|
-
callback: Optional[Callable[[int, int, torch.
|
122
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
123
123
|
callback_steps: int = 1,
|
124
124
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
125
125
|
micro_conditioning_aesthetic_score: int = 6,
|
@@ -132,13 +132,13 @@ class AmusedInpaintPipeline(DiffusionPipeline):
|
|
132
132
|
Args:
|
133
133
|
prompt (`str` or `List[str]`, *optional*):
|
134
134
|
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
135
|
-
image (`torch.
|
135
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
136
136
|
`Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
|
137
137
|
numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
|
138
138
|
or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
|
139
139
|
list of arrays, the expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image
|
140
140
|
latents as `image`, but if passing latents directly it is not encoded again.
|
141
|
-
mask_image (`torch.
|
141
|
+
mask_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
142
142
|
`Image`, numpy array or tensor representing an image batch to mask `image`. White pixels in the mask
|
143
143
|
are repainted while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
|
144
144
|
single channel (luminance) before use. If it's a numpy array or pytorch tensor, it should contain one
|
@@ -165,16 +165,16 @@ class AmusedInpaintPipeline(DiffusionPipeline):
|
|
165
165
|
generator (`torch.Generator`, *optional*):
|
166
166
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
167
167
|
generation deterministic.
|
168
|
-
prompt_embeds (`torch.
|
168
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
169
169
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
170
170
|
provided, text embeddings are generated from the `prompt` input argument. A single vector from the
|
171
171
|
pooled and projected final hidden states.
|
172
|
-
encoder_hidden_states (`torch.
|
172
|
+
encoder_hidden_states (`torch.Tensor`, *optional*):
|
173
173
|
Pre-generated penultimate hidden states from the text encoder providing additional text conditioning.
|
174
|
-
negative_prompt_embeds (`torch.
|
174
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
175
175
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
176
176
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
177
|
-
negative_encoder_hidden_states (`torch.
|
177
|
+
negative_encoder_hidden_states (`torch.Tensor`, *optional*):
|
178
178
|
Analogous to `encoder_hidden_states` for the positive prompt.
|
179
179
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
180
180
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
@@ -183,7 +183,7 @@ class AmusedInpaintPipeline(DiffusionPipeline):
|
|
183
183
|
plain tuple.
|
184
184
|
callback (`Callable`, *optional*):
|
185
185
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
186
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
186
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
187
187
|
callback_steps (`int`, *optional*, defaults to 1):
|
188
188
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
189
189
|
every step.
|
@@ -191,10 +191,12 @@ class AmusedInpaintPipeline(DiffusionPipeline):
|
|
191
191
|
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
192
192
|
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
193
193
|
micro_conditioning_aesthetic_score (`int`, *optional*, defaults to 6):
|
194
|
-
The targeted aesthetic score according to the laion aesthetic classifier. See
|
195
|
-
and the micro-conditioning section of
|
194
|
+
The targeted aesthetic score according to the laion aesthetic classifier. See
|
195
|
+
https://laion.ai/blog/laion-aesthetics/ and the micro-conditioning section of
|
196
|
+
https://arxiv.org/abs/2307.01952.
|
196
197
|
micro_conditioning_crop_coord (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
197
|
-
The targeted height, width crop coordinates. See the micro-conditioning section of
|
198
|
+
The targeted height, width crop coordinates. See the micro-conditioning section of
|
199
|
+
https://arxiv.org/abs/2307.01952.
|
198
200
|
temperature (`Union[int, Tuple[int, int], List[int]]`, *optional*, defaults to (2, 0)):
|
199
201
|
Configures the temperature scheduler on `self.scheduler` see `AmusedScheduler#set_timesteps`.
|
200
202
|
|
@@ -22,6 +22,7 @@ except OptionalDependencyNotAvailable:
|
|
22
22
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
23
23
|
else:
|
24
24
|
_import_structure["pipeline_animatediff"] = ["AnimateDiffPipeline"]
|
25
|
+
_import_structure["pipeline_animatediff_sdxl"] = ["AnimateDiffSDXLPipeline"]
|
25
26
|
_import_structure["pipeline_animatediff_video2video"] = ["AnimateDiffVideoToVideoPipeline"]
|
26
27
|
|
27
28
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
@@ -33,6 +34,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
33
34
|
|
34
35
|
else:
|
35
36
|
from .pipeline_animatediff import AnimateDiffPipeline
|
37
|
+
from .pipeline_animatediff_sdxl import AnimateDiffSDXLPipeline
|
36
38
|
from .pipeline_animatediff_video2video import AnimateDiffVideoToVideoPipeline
|
37
39
|
from .pipeline_output import AnimateDiffPipelineOutput
|
38
40
|
|
@@ -15,11 +15,10 @@
|
|
15
15
|
import inspect
|
16
16
|
from typing import Any, Callable, Dict, List, Optional, Union
|
17
17
|
|
18
|
-
import numpy as np
|
19
18
|
import torch
|
20
19
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
21
20
|
|
22
|
-
from ...image_processor import PipelineImageInput
|
21
|
+
from ...image_processor import PipelineImageInput
|
23
22
|
from ...loaders import IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
24
23
|
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
|
25
24
|
from ...models.lora import adjust_lora_scale_text_encoder
|
@@ -41,6 +40,7 @@ from ...utils import (
|
|
41
40
|
unscale_lora_layers,
|
42
41
|
)
|
43
42
|
from ...utils.torch_utils import randn_tensor
|
43
|
+
from ...video_processor import VideoProcessor
|
44
44
|
from ..free_init_utils import FreeInitMixin
|
45
45
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
46
46
|
from .pipeline_output import AnimateDiffPipelineOutput
|
@@ -65,27 +65,6 @@ EXAMPLE_DOC_STRING = """
|
|
65
65
|
"""
|
66
66
|
|
67
67
|
|
68
|
-
def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: str = "np"):
|
69
|
-
batch_size, channels, num_frames, height, width = video.shape
|
70
|
-
outputs = []
|
71
|
-
for batch_idx in range(batch_size):
|
72
|
-
batch_vid = video[batch_idx].permute(1, 0, 2, 3)
|
73
|
-
batch_output = processor.postprocess(batch_vid, output_type)
|
74
|
-
|
75
|
-
outputs.append(batch_output)
|
76
|
-
|
77
|
-
if output_type == "np":
|
78
|
-
outputs = np.stack(outputs)
|
79
|
-
|
80
|
-
elif output_type == "pt":
|
81
|
-
outputs = torch.stack(outputs)
|
82
|
-
|
83
|
-
elif not output_type == "pil":
|
84
|
-
raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']")
|
85
|
-
|
86
|
-
return outputs
|
87
|
-
|
88
|
-
|
89
68
|
class AnimateDiffPipeline(
|
90
69
|
DiffusionPipeline,
|
91
70
|
StableDiffusionMixin,
|
@@ -131,7 +110,7 @@ class AnimateDiffPipeline(
|
|
131
110
|
vae: AutoencoderKL,
|
132
111
|
text_encoder: CLIPTextModel,
|
133
112
|
tokenizer: CLIPTokenizer,
|
134
|
-
unet: UNet2DConditionModel,
|
113
|
+
unet: Union[UNet2DConditionModel, UNetMotionModel],
|
135
114
|
motion_adapter: MotionAdapter,
|
136
115
|
scheduler: Union[
|
137
116
|
DDIMScheduler,
|
@@ -159,7 +138,7 @@ class AnimateDiffPipeline(
|
|
159
138
|
image_encoder=image_encoder,
|
160
139
|
)
|
161
140
|
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
162
|
-
self.
|
141
|
+
self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
|
163
142
|
|
164
143
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt with num_images_per_prompt -> num_videos_per_prompt
|
165
144
|
def encode_prompt(
|
@@ -169,8 +148,8 @@ class AnimateDiffPipeline(
|
|
169
148
|
num_images_per_prompt,
|
170
149
|
do_classifier_free_guidance,
|
171
150
|
negative_prompt=None,
|
172
|
-
prompt_embeds: Optional[torch.
|
173
|
-
negative_prompt_embeds: Optional[torch.
|
151
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
152
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
174
153
|
lora_scale: Optional[float] = None,
|
175
154
|
clip_skip: Optional[int] = None,
|
176
155
|
):
|
@@ -190,10 +169,10 @@ class AnimateDiffPipeline(
|
|
190
169
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
191
170
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
192
171
|
less than `1`).
|
193
|
-
prompt_embeds (`torch.
|
172
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
194
173
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
195
174
|
provided, text embeddings will be generated from `prompt` input argument.
|
196
|
-
negative_prompt_embeds (`torch.
|
175
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
197
176
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
198
177
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
199
178
|
argument.
|
@@ -584,11 +563,11 @@ class AnimateDiffPipeline(
|
|
584
563
|
num_videos_per_prompt: Optional[int] = 1,
|
585
564
|
eta: float = 0.0,
|
586
565
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
587
|
-
latents: Optional[torch.
|
588
|
-
prompt_embeds: Optional[torch.
|
589
|
-
negative_prompt_embeds: Optional[torch.
|
566
|
+
latents: Optional[torch.Tensor] = None,
|
567
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
568
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
590
569
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
591
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
570
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
592
571
|
output_type: Optional[str] = "pil",
|
593
572
|
return_dict: bool = True,
|
594
573
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -625,27 +604,26 @@ class AnimateDiffPipeline(
|
|
625
604
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
626
605
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
627
606
|
generation deterministic.
|
628
|
-
latents (`torch.
|
607
|
+
latents (`torch.Tensor`, *optional*):
|
629
608
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
|
630
609
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
631
610
|
tensor is generated by sampling using the supplied random `generator`. Latents should be of shape
|
632
611
|
`(batch_size, num_channel, num_frames, height, width)`.
|
633
|
-
prompt_embeds (`torch.
|
612
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
634
613
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
635
614
|
provided, text embeddings are generated from the `prompt` input argument.
|
636
|
-
negative_prompt_embeds (`torch.
|
615
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
637
616
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
638
617
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
639
618
|
ip_adapter_image: (`PipelineImageInput`, *optional*):
|
640
619
|
Optional image input to work with IP Adapters.
|
641
|
-
ip_adapter_image_embeds (`List[torch.
|
642
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
643
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
644
|
-
if `do_classifier_free_guidance` is set to `True`.
|
645
|
-
|
620
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
621
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
622
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
623
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
624
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
646
625
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
647
|
-
The output format of the generated video. Choose between `torch.
|
648
|
-
`np.array`.
|
626
|
+
The output format of the generated video. Choose between `torch.Tensor`, `PIL.Image` or `np.array`.
|
649
627
|
return_dict (`bool`, *optional*, defaults to `True`):
|
650
628
|
Whether or not to return a [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] instead
|
651
629
|
of a plain tuple.
|
@@ -663,7 +641,7 @@ class AnimateDiffPipeline(
|
|
663
641
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
664
642
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
665
643
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
666
|
-
`._callback_tensor_inputs` attribute of your
|
644
|
+
`._callback_tensor_inputs` attribute of your pipeline class.
|
667
645
|
|
668
646
|
Examples:
|
669
647
|
|
@@ -792,7 +770,7 @@ class AnimateDiffPipeline(
|
|
792
770
|
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
793
771
|
|
794
772
|
# 8. Denoising loop
|
795
|
-
with self.progress_bar(total=
|
773
|
+
with self.progress_bar(total=self._num_timesteps) as progress_bar:
|
796
774
|
for i, t in enumerate(timesteps):
|
797
775
|
# expand the latents if we are doing classifier free guidance
|
798
776
|
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
|
@@ -836,7 +814,7 @@ class AnimateDiffPipeline(
|
|
836
814
|
video = latents
|
837
815
|
else:
|
838
816
|
video_tensor = self.decode_latents(latents)
|
839
|
-
video =
|
817
|
+
video = self.video_processor.postprocess_video(video=video_tensor, output_type=output_type)
|
840
818
|
|
841
819
|
# 10. Offload all models
|
842
820
|
self.maybe_free_model_hooks()
|