diffusers 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +26 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +33 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +8 -0
- diffusers/models/activations.py +23 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +475 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +363 -32
- diffusers/models/model_loading_utils.py +177 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_outputs.py +14 -0
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +175 -99
- diffusers/models/normalization.py +2 -1
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/__init__.py +3 -0
- diffusers/models/transformers/dit_transformer_2d.py +240 -0
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/hunyuan_transformer_2d.py +427 -0
- diffusers/models/transformers/pixart_transformer_2d.py +336 -0
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +292 -184
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +19 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +27 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +7 -4
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/hunyuandit/__init__.py +48 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +881 -0
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +269 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +69 -79
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +20 -26
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +42 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
- diffusers/schedulers/scheduling_edm_euler.py +50 -31
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
- diffusers/schedulers/scheduling_euler_discrete.py +160 -68
- diffusers/schedulers/scheduling_heun_discrete.py +57 -39
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +24 -26
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +75 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +105 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/METADATA +7 -7
- diffusers-0.28.1.dist-info/RECORD +419 -0
- diffusers-0.27.2.dist-info/RECORD +0 -399
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/LICENSE +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/WHEEL +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/top_level.txt +0 -0
@@ -120,8 +120,8 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
120
120
|
num_waveforms_per_prompt,
|
121
121
|
do_classifier_free_guidance,
|
122
122
|
negative_prompt=None,
|
123
|
-
prompt_embeds: Optional[torch.
|
124
|
-
negative_prompt_embeds: Optional[torch.
|
123
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
124
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
125
125
|
):
|
126
126
|
r"""
|
127
127
|
Encodes the prompt into text encoder hidden states.
|
@@ -139,10 +139,10 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
139
139
|
The prompt or prompts not to guide the audio generation. If not defined, one has to pass
|
140
140
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
141
141
|
less than `1`).
|
142
|
-
prompt_embeds (`torch.
|
142
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
143
143
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
144
144
|
provided, text embeddings will be generated from `prompt` input argument.
|
145
|
-
negative_prompt_embeds (`torch.
|
145
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
146
146
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
147
147
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
148
148
|
argument.
|
@@ -363,8 +363,8 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
363
363
|
shape = (
|
364
364
|
batch_size,
|
365
365
|
num_channels_latents,
|
366
|
-
height // self.vae_scale_factor,
|
367
|
-
self.vocoder.config.model_in_dim // self.vae_scale_factor,
|
366
|
+
int(height) // self.vae_scale_factor,
|
367
|
+
int(self.vocoder.config.model_in_dim) // self.vae_scale_factor,
|
368
368
|
)
|
369
369
|
if isinstance(generator, list) and len(generator) != batch_size:
|
370
370
|
raise ValueError(
|
@@ -427,11 +427,11 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
427
427
|
num_waveforms_per_prompt: Optional[int] = 1,
|
428
428
|
eta: float = 0.0,
|
429
429
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
430
|
-
latents: Optional[torch.
|
431
|
-
prompt_embeds: Optional[torch.
|
432
|
-
negative_prompt_embeds: Optional[torch.
|
430
|
+
latents: Optional[torch.Tensor] = None,
|
431
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
432
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
433
433
|
return_dict: bool = True,
|
434
|
-
callback: Optional[Callable[[int, int, torch.
|
434
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
435
435
|
callback_steps: Optional[int] = 1,
|
436
436
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
437
437
|
output_type: Optional[str] = "np",
|
@@ -465,21 +465,21 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
465
465
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
466
466
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
467
467
|
generation deterministic.
|
468
|
-
latents (`torch.
|
468
|
+
latents (`torch.Tensor`, *optional*):
|
469
469
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
470
470
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
471
471
|
tensor is generated by sampling using the supplied random `generator`.
|
472
|
-
prompt_embeds (`torch.
|
472
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
473
473
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
474
474
|
provided, text embeddings are generated from the `prompt` input argument.
|
475
|
-
negative_prompt_embeds (`torch.
|
475
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
476
476
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
477
477
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
478
478
|
return_dict (`bool`, *optional*, defaults to `True`):
|
479
479
|
Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple.
|
480
480
|
callback (`Callable`, *optional*):
|
481
481
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
482
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
482
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
483
483
|
callback_steps (`int`, *optional*, defaults to 1):
|
484
484
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
485
485
|
every step.
|
@@ -266,7 +266,7 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
266
266
|
and not isinstance(image, list)
|
267
267
|
):
|
268
268
|
raise ValueError(
|
269
|
-
"`image` has to be of type `torch.
|
269
|
+
"`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
|
270
270
|
f" {type(image)}"
|
271
271
|
)
|
272
272
|
|
@@ -283,7 +283,12 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
283
283
|
|
284
284
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
285
285
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
286
|
-
shape = (
|
286
|
+
shape = (
|
287
|
+
batch_size,
|
288
|
+
num_channels_latents,
|
289
|
+
int(height) // self.vae_scale_factor,
|
290
|
+
int(width) // self.vae_scale_factor,
|
291
|
+
)
|
287
292
|
if isinstance(generator, list) and len(generator) != batch_size:
|
288
293
|
raise ValueError(
|
289
294
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -388,9 +393,9 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
388
393
|
@torch.no_grad()
|
389
394
|
def __call__(
|
390
395
|
self,
|
391
|
-
example_image: Union[torch.
|
392
|
-
image: Union[torch.
|
393
|
-
mask_image: Union[torch.
|
396
|
+
example_image: Union[torch.Tensor, PIL.Image.Image],
|
397
|
+
image: Union[torch.Tensor, PIL.Image.Image],
|
398
|
+
mask_image: Union[torch.Tensor, PIL.Image.Image],
|
394
399
|
height: Optional[int] = None,
|
395
400
|
width: Optional[int] = None,
|
396
401
|
num_inference_steps: int = 50,
|
@@ -399,22 +404,22 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
399
404
|
num_images_per_prompt: Optional[int] = 1,
|
400
405
|
eta: float = 0.0,
|
401
406
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
402
|
-
latents: Optional[torch.
|
407
|
+
latents: Optional[torch.Tensor] = None,
|
403
408
|
output_type: Optional[str] = "pil",
|
404
409
|
return_dict: bool = True,
|
405
|
-
callback: Optional[Callable[[int, int, torch.
|
410
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
406
411
|
callback_steps: int = 1,
|
407
412
|
):
|
408
413
|
r"""
|
409
414
|
The call function to the pipeline for generation.
|
410
415
|
|
411
416
|
Args:
|
412
|
-
example_image (`torch.
|
417
|
+
example_image (`torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
|
413
418
|
An example image to guide image generation.
|
414
|
-
image (`torch.
|
419
|
+
image (`torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
|
415
420
|
`Image` or tensor representing an image batch to be inpainted (parts of the image are masked out with
|
416
421
|
`mask_image` and repainted according to `prompt`).
|
417
|
-
mask_image (`torch.
|
422
|
+
mask_image (`torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
|
418
423
|
`Image` or tensor representing an image batch to mask `image`. White pixels in the mask are repainted,
|
419
424
|
while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a single channel
|
420
425
|
(luminance) before use. If it's a tensor, it should contain one color channel (L) instead of 3, so the
|
@@ -440,7 +445,7 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
440
445
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
441
446
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
442
447
|
generation deterministic.
|
443
|
-
latents (`torch.
|
448
|
+
latents (`torch.Tensor`, *optional*):
|
444
449
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
445
450
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
446
451
|
tensor is generated by sampling using the supplied random `generator`.
|
@@ -451,7 +456,7 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
451
456
|
plain tuple.
|
452
457
|
callback (`Callable`, *optional*):
|
453
458
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
454
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
459
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
455
460
|
callback_steps (`int`, *optional*, defaults to 1):
|
456
461
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
457
462
|
every step.
|
@@ -13,17 +13,15 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
import inspect
|
16
|
-
import math
|
17
16
|
from dataclasses import dataclass
|
18
|
-
from typing import Any, Callable, Dict, List, Optional,
|
17
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
19
18
|
|
20
19
|
import numpy as np
|
21
20
|
import PIL
|
22
21
|
import torch
|
23
|
-
import torch.fft as fft
|
24
22
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
25
23
|
|
26
|
-
from ...image_processor import PipelineImageInput
|
24
|
+
from ...image_processor import PipelineImageInput
|
27
25
|
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
28
26
|
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
|
29
27
|
from ...models.lora import adjust_lora_scale_text_encoder
|
@@ -45,6 +43,7 @@ from ...utils import (
|
|
45
43
|
unscale_lora_layers,
|
46
44
|
)
|
47
45
|
from ...utils.torch_utils import randn_tensor
|
46
|
+
from ...video_processor import VideoProcessor
|
48
47
|
from ..free_init_utils import FreeInitMixin
|
49
48
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
50
49
|
|
@@ -61,6 +60,7 @@ EXAMPLE_DOC_STRING = """
|
|
61
60
|
... PIAPipeline,
|
62
61
|
... )
|
63
62
|
>>> from diffusers.utils import export_to_gif, load_image
|
63
|
+
|
64
64
|
>>> adapter = MotionAdapter.from_pretrained("../checkpoints/pia-diffusers")
|
65
65
|
>>> pipe = PIAPipeline.from_pretrained("SG161222/Realistic_Vision_V6.0_B1_noVAE", motion_adapter=adapter)
|
66
66
|
>>> pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
@@ -90,28 +90,6 @@ RANGE_LIST = [
|
|
90
90
|
]
|
91
91
|
|
92
92
|
|
93
|
-
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.tensor2vid
|
94
|
-
def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: str = "np"):
|
95
|
-
batch_size, channels, num_frames, height, width = video.shape
|
96
|
-
outputs = []
|
97
|
-
for batch_idx in range(batch_size):
|
98
|
-
batch_vid = video[batch_idx].permute(1, 0, 2, 3)
|
99
|
-
batch_output = processor.postprocess(batch_vid, output_type)
|
100
|
-
|
101
|
-
outputs.append(batch_output)
|
102
|
-
|
103
|
-
if output_type == "np":
|
104
|
-
outputs = np.stack(outputs)
|
105
|
-
|
106
|
-
elif output_type == "pt":
|
107
|
-
outputs = torch.stack(outputs)
|
108
|
-
|
109
|
-
elif not output_type == "pil":
|
110
|
-
raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']")
|
111
|
-
|
112
|
-
return outputs
|
113
|
-
|
114
|
-
|
115
93
|
def prepare_mask_coef_by_statistics(num_frames: int, cond_frame: int, motion_scale: int):
|
116
94
|
assert num_frames > 0, "video_length should be greater than 0"
|
117
95
|
|
@@ -130,71 +108,6 @@ def prepare_mask_coef_by_statistics(num_frames: int, cond_frame: int, motion_sca
|
|
130
108
|
return coef
|
131
109
|
|
132
110
|
|
133
|
-
def _get_freeinit_freq_filter(
|
134
|
-
shape: Tuple[int, ...],
|
135
|
-
device: Union[str, torch.dtype],
|
136
|
-
filter_type: str,
|
137
|
-
order: float,
|
138
|
-
spatial_stop_frequency: float,
|
139
|
-
temporal_stop_frequency: float,
|
140
|
-
) -> torch.Tensor:
|
141
|
-
r"""Returns the FreeInit filter based on filter type and other input conditions."""
|
142
|
-
|
143
|
-
time, height, width = shape[-3], shape[-2], shape[-1]
|
144
|
-
mask = torch.zeros(shape)
|
145
|
-
|
146
|
-
if spatial_stop_frequency == 0 or temporal_stop_frequency == 0:
|
147
|
-
return mask
|
148
|
-
|
149
|
-
if filter_type == "butterworth":
|
150
|
-
|
151
|
-
def retrieve_mask(x):
|
152
|
-
return 1 / (1 + (x / spatial_stop_frequency**2) ** order)
|
153
|
-
elif filter_type == "gaussian":
|
154
|
-
|
155
|
-
def retrieve_mask(x):
|
156
|
-
return math.exp(-1 / (2 * spatial_stop_frequency**2) * x)
|
157
|
-
elif filter_type == "ideal":
|
158
|
-
|
159
|
-
def retrieve_mask(x):
|
160
|
-
return 1 if x <= spatial_stop_frequency * 2 else 0
|
161
|
-
else:
|
162
|
-
raise NotImplementedError("`filter_type` must be one of gaussian, butterworth or ideal")
|
163
|
-
|
164
|
-
for t in range(time):
|
165
|
-
for h in range(height):
|
166
|
-
for w in range(width):
|
167
|
-
d_square = (
|
168
|
-
((spatial_stop_frequency / temporal_stop_frequency) * (2 * t / time - 1)) ** 2
|
169
|
-
+ (2 * h / height - 1) ** 2
|
170
|
-
+ (2 * w / width - 1) ** 2
|
171
|
-
)
|
172
|
-
mask[..., t, h, w] = retrieve_mask(d_square)
|
173
|
-
|
174
|
-
return mask.to(device)
|
175
|
-
|
176
|
-
|
177
|
-
def _freq_mix_3d(x: torch.Tensor, noise: torch.Tensor, LPF: torch.Tensor) -> torch.Tensor:
|
178
|
-
r"""Noise reinitialization."""
|
179
|
-
# FFT
|
180
|
-
x_freq = fft.fftn(x, dim=(-3, -2, -1))
|
181
|
-
x_freq = fft.fftshift(x_freq, dim=(-3, -2, -1))
|
182
|
-
noise_freq = fft.fftn(noise, dim=(-3, -2, -1))
|
183
|
-
noise_freq = fft.fftshift(noise_freq, dim=(-3, -2, -1))
|
184
|
-
|
185
|
-
# frequency mix
|
186
|
-
HPF = 1 - LPF
|
187
|
-
x_freq_low = x_freq * LPF
|
188
|
-
noise_freq_high = noise_freq * HPF
|
189
|
-
x_freq_mixed = x_freq_low + noise_freq_high # mix in freq domain
|
190
|
-
|
191
|
-
# IFFT
|
192
|
-
x_freq_mixed = fft.ifftshift(x_freq_mixed, dim=(-3, -2, -1))
|
193
|
-
x_mixed = fft.ifftn(x_freq_mixed, dim=(-3, -2, -1)).real
|
194
|
-
|
195
|
-
return x_mixed
|
196
|
-
|
197
|
-
|
198
111
|
@dataclass
|
199
112
|
class PIAPipelineOutput(BaseOutput):
|
200
113
|
r"""
|
@@ -202,9 +115,9 @@ class PIAPipelineOutput(BaseOutput):
|
|
202
115
|
|
203
116
|
Args:
|
204
117
|
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
|
205
|
-
|
206
|
-
|
207
|
-
|
118
|
+
Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`, NumPy array of
|
119
|
+
shape `(batch_size, num_frames, channels, height, width, Torch tensor of shape `(batch_size, num_frames,
|
120
|
+
channels, height, width)`.
|
208
121
|
"""
|
209
122
|
|
210
123
|
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
|
@@ -284,7 +197,7 @@ class PIAPipeline(
|
|
284
197
|
image_encoder=image_encoder,
|
285
198
|
)
|
286
199
|
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
287
|
-
self.
|
200
|
+
self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
|
288
201
|
|
289
202
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt with num_images_per_prompt -> num_videos_per_prompt
|
290
203
|
def encode_prompt(
|
@@ -294,8 +207,8 @@ class PIAPipeline(
|
|
294
207
|
num_images_per_prompt,
|
295
208
|
do_classifier_free_guidance,
|
296
209
|
negative_prompt=None,
|
297
|
-
prompt_embeds: Optional[torch.
|
298
|
-
negative_prompt_embeds: Optional[torch.
|
210
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
211
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
299
212
|
lora_scale: Optional[float] = None,
|
300
213
|
clip_skip: Optional[int] = None,
|
301
214
|
):
|
@@ -315,10 +228,10 @@ class PIAPipeline(
|
|
315
228
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
316
229
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
317
230
|
less than `1`).
|
318
|
-
prompt_embeds (`torch.
|
231
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
319
232
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
320
233
|
provided, text embeddings will be generated from `prompt` input argument.
|
321
|
-
negative_prompt_embeds (`torch.
|
234
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
322
235
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
323
236
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
324
237
|
argument.
|
@@ -687,7 +600,7 @@ class PIAPipeline(
|
|
687
600
|
)
|
688
601
|
_, _, _, scaled_height, scaled_width = shape
|
689
602
|
|
690
|
-
image = self.
|
603
|
+
image = self.video_processor.preprocess(image)
|
691
604
|
image = image.to(device, dtype)
|
692
605
|
|
693
606
|
if isinstance(generator, list):
|
@@ -767,11 +680,11 @@ class PIAPipeline(
|
|
767
680
|
num_videos_per_prompt: Optional[int] = 1,
|
768
681
|
eta: float = 0.0,
|
769
682
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
770
|
-
latents: Optional[torch.
|
771
|
-
prompt_embeds: Optional[torch.
|
772
|
-
negative_prompt_embeds: Optional[torch.
|
683
|
+
latents: Optional[torch.Tensor] = None,
|
684
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
685
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
773
686
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
774
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
687
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
775
688
|
motion_scale: int = 0,
|
776
689
|
output_type: Optional[str] = "pil",
|
777
690
|
return_dict: bool = True,
|
@@ -788,7 +701,8 @@ class PIAPipeline(
|
|
788
701
|
The input image to be used for video generation.
|
789
702
|
prompt (`str` or `List[str]`, *optional*):
|
790
703
|
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
791
|
-
strength (`float`, *optional*, defaults to 1.0):
|
704
|
+
strength (`float`, *optional*, defaults to 1.0):
|
705
|
+
Indicates extent to transform the reference `image`. Must be between 0 and 1.
|
792
706
|
height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
793
707
|
The height in pixels of the generated video.
|
794
708
|
width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
@@ -811,33 +725,31 @@ class PIAPipeline(
|
|
811
725
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
812
726
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
813
727
|
generation deterministic.
|
814
|
-
latents (`torch.
|
728
|
+
latents (`torch.Tensor`, *optional*):
|
815
729
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
|
816
730
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
817
731
|
tensor is generated by sampling using the supplied random `generator`. Latents should be of shape
|
818
732
|
`(batch_size, num_channel, num_frames, height, width)`.
|
819
|
-
prompt_embeds (`torch.
|
733
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
820
734
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
821
735
|
provided, text embeddings are generated from the `prompt` input argument.
|
822
|
-
negative_prompt_embeds (`torch.
|
736
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
823
737
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
824
738
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
825
739
|
ip_adapter_image: (`PipelineImageInput`, *optional*):
|
826
740
|
Optional image input to work with IP Adapters.
|
827
|
-
ip_adapter_image_embeds (`List[torch.
|
828
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
829
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
830
|
-
if `do_classifier_free_guidance` is set to `True`.
|
831
|
-
|
741
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
742
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
743
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
744
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
745
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
832
746
|
motion_scale: (`int`, *optional*, defaults to 0):
|
833
|
-
Parameter that controls the amount and type of motion that is added to the image. Increasing the value
|
834
|
-
ranges of values control the type of motion that is
|
835
|
-
Set between 0-2 to only increase the amount of motion.
|
836
|
-
Set between
|
837
|
-
Set between 6-8 to perform motion with image style transfer.
|
747
|
+
Parameter that controls the amount and type of motion that is added to the image. Increasing the value
|
748
|
+
increases the amount of motion, while specific ranges of values control the type of motion that is
|
749
|
+
added. Must be between 0 and 8. Set between 0-2 to only increase the amount of motion. Set between 3-5
|
750
|
+
to create looping motion. Set between 6-8 to perform motion with image style transfer.
|
838
751
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
839
|
-
The output format of the generated video. Choose between `torch.
|
840
|
-
`np.array`.
|
752
|
+
The output format of the generated video. Choose between `torch.Tensor`, `PIL.Image` or `np.array`.
|
841
753
|
return_dict (`bool`, *optional*, defaults to `True`):
|
842
754
|
Whether or not to return a [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] instead
|
843
755
|
of a plain tuple.
|
@@ -855,14 +767,14 @@ class PIAPipeline(
|
|
855
767
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
856
768
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
857
769
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
858
|
-
`._callback_tensor_inputs` attribute of your
|
770
|
+
`._callback_tensor_inputs` attribute of your pipeline class.
|
859
771
|
|
860
772
|
Examples:
|
861
773
|
|
862
774
|
Returns:
|
863
775
|
[`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] or `tuple`:
|
864
|
-
If `return_dict` is `True`, [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] is
|
865
|
-
|
776
|
+
If `return_dict` is `True`, [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] is returned, otherwise a
|
777
|
+
`tuple` is returned where the first element is a list with the generated frames.
|
866
778
|
"""
|
867
779
|
# 0. Default height and width to unet
|
868
780
|
height = height or self.unet.config.sample_size * self.vae_scale_factor
|
@@ -979,8 +891,10 @@ class PIAPipeline(
|
|
979
891
|
latents, free_init_iter, num_inference_steps, device, latents.dtype, generator
|
980
892
|
)
|
981
893
|
|
894
|
+
self._num_timesteps = len(timesteps)
|
982
895
|
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
983
|
-
|
896
|
+
|
897
|
+
with self.progress_bar(total=self._num_timesteps) as progress_bar:
|
984
898
|
for i, t in enumerate(timesteps):
|
985
899
|
# expand the latents if we are doing classifier free guidance
|
986
900
|
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
|
@@ -1023,7 +937,7 @@ class PIAPipeline(
|
|
1023
937
|
video = latents
|
1024
938
|
else:
|
1025
939
|
video_tensor = self.decode_latents(latents)
|
1026
|
-
video =
|
940
|
+
video = self.video_processor.postprocess_video(video=video_tensor, output_type=output_type)
|
1027
941
|
|
1028
942
|
# 10. Offload all models
|
1029
943
|
self.maybe_free_model_hooks()
|
@@ -254,9 +254,9 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|
254
254
|
force_download (`bool`, *optional*, defaults to `False`):
|
255
255
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
256
256
|
cached versions if they exist.
|
257
|
-
resume_download
|
258
|
-
|
259
|
-
|
257
|
+
resume_download:
|
258
|
+
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
259
|
+
of Diffusers.
|
260
260
|
proxies (`Dict[str, str]`, *optional*):
|
261
261
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
262
262
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
@@ -316,7 +316,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|
316
316
|
```
|
317
317
|
"""
|
318
318
|
cache_dir = kwargs.pop("cache_dir", None)
|
319
|
-
resume_download = kwargs.pop("resume_download",
|
319
|
+
resume_download = kwargs.pop("resume_download", None)
|
320
320
|
proxies = kwargs.pop("proxies", None)
|
321
321
|
local_files_only = kwargs.pop("local_files_only", False)
|
322
322
|
token = kwargs.pop("token", None)
|