diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -19,10 +19,9 @@ import urllib.parse as ul
|
|
19
19
|
from typing import Callable, List, Optional, Tuple, Union
|
20
20
|
|
21
21
|
import torch
|
22
|
-
import torch.nn.functional as F
|
23
22
|
from transformers import T5EncoderModel, T5Tokenizer
|
24
23
|
|
25
|
-
from ...image_processor import
|
24
|
+
from ...image_processor import PixArtImageProcessor
|
26
25
|
from ...models import AutoencoderKL, Transformer2DModel
|
27
26
|
from ...schedulers import DPMSolverMultistepScheduler
|
28
27
|
from ...utils import (
|
@@ -176,6 +175,7 @@ def retrieve_timesteps(
|
|
176
175
|
num_inference_steps: Optional[int] = None,
|
177
176
|
device: Optional[Union[str, torch.device]] = None,
|
178
177
|
timesteps: Optional[List[int]] = None,
|
178
|
+
sigmas: Optional[List[float]] = None,
|
179
179
|
**kwargs,
|
180
180
|
):
|
181
181
|
"""
|
@@ -186,19 +186,23 @@ def retrieve_timesteps(
|
|
186
186
|
scheduler (`SchedulerMixin`):
|
187
187
|
The scheduler to get timesteps from.
|
188
188
|
num_inference_steps (`int`):
|
189
|
-
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
190
|
-
|
189
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
|
190
|
+
must be `None`.
|
191
191
|
device (`str` or `torch.device`, *optional*):
|
192
192
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
193
193
|
timesteps (`List[int]`, *optional*):
|
194
|
-
|
195
|
-
|
196
|
-
|
194
|
+
Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
|
195
|
+
`num_inference_steps` and `sigmas` must be `None`.
|
196
|
+
sigmas (`List[float]`, *optional*):
|
197
|
+
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
198
|
+
`num_inference_steps` and `timesteps` must be `None`.
|
197
199
|
|
198
200
|
Returns:
|
199
201
|
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
200
202
|
second element is the number of inference steps.
|
201
203
|
"""
|
204
|
+
if timesteps is not None and sigmas is not None:
|
205
|
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
202
206
|
if timesteps is not None:
|
203
207
|
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
204
208
|
if not accepts_timesteps:
|
@@ -209,6 +213,16 @@ def retrieve_timesteps(
|
|
209
213
|
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
210
214
|
timesteps = scheduler.timesteps
|
211
215
|
num_inference_steps = len(timesteps)
|
216
|
+
elif sigmas is not None:
|
217
|
+
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
218
|
+
if not accept_sigmas:
|
219
|
+
raise ValueError(
|
220
|
+
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
221
|
+
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
222
|
+
)
|
223
|
+
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
224
|
+
timesteps = scheduler.timesteps
|
225
|
+
num_inference_steps = len(timesteps)
|
212
226
|
else:
|
213
227
|
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
214
228
|
timesteps = scheduler.timesteps
|
@@ -272,16 +286,7 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
272
286
|
)
|
273
287
|
|
274
288
|
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
275
|
-
self.image_processor =
|
276
|
-
|
277
|
-
# Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/utils.py
|
278
|
-
def mask_text_embeddings(self, emb, mask):
|
279
|
-
if emb.shape[0] == 1:
|
280
|
-
keep_index = mask.sum().item()
|
281
|
-
return emb[:, :, :keep_index, :], keep_index
|
282
|
-
else:
|
283
|
-
masked_feature = emb * mask[:, None, :, None]
|
284
|
-
return masked_feature, emb.shape[2]
|
289
|
+
self.image_processor = PixArtImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
285
290
|
|
286
291
|
# Adapted from diffusers.pipelines.deepfloyd_if.pipeline_if.encode_prompt
|
287
292
|
def encode_prompt(
|
@@ -291,10 +296,10 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
291
296
|
negative_prompt: str = "",
|
292
297
|
num_images_per_prompt: int = 1,
|
293
298
|
device: Optional[torch.device] = None,
|
294
|
-
prompt_embeds: Optional[torch.
|
295
|
-
negative_prompt_embeds: Optional[torch.
|
296
|
-
prompt_attention_mask: Optional[torch.
|
297
|
-
negative_prompt_attention_mask: Optional[torch.
|
299
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
300
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
301
|
+
prompt_attention_mask: Optional[torch.Tensor] = None,
|
302
|
+
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
|
298
303
|
clean_caption: bool = False,
|
299
304
|
max_sequence_length: int = 120,
|
300
305
|
**kwargs,
|
@@ -315,10 +320,10 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
315
320
|
number of images that should be generated per prompt
|
316
321
|
device: (`torch.device`, *optional*):
|
317
322
|
torch device to place the resulting embeddings on
|
318
|
-
prompt_embeds (`torch.
|
323
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
319
324
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
320
325
|
provided, text embeddings will be generated from `prompt` input argument.
|
321
|
-
negative_prompt_embeds (`torch.
|
326
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
322
327
|
Pre-generated negative text embeddings. For PixArt-Alpha, it's should be the embeddings of the ""
|
323
328
|
string.
|
324
329
|
clean_caption (`bool`, defaults to `False`):
|
@@ -361,7 +366,7 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
361
366
|
):
|
362
367
|
removed_text = self.tokenizer.batch_decode(untruncated_ids[:, max_length - 1 : -1])
|
363
368
|
logger.warning(
|
364
|
-
"The following part of your input was truncated because
|
369
|
+
"The following part of your input was truncated because T5 can only handle sequences up to"
|
365
370
|
f" {max_length} tokens: {removed_text}"
|
366
371
|
)
|
367
372
|
|
@@ -653,7 +658,12 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
653
658
|
|
654
659
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
655
660
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
656
|
-
shape = (
|
661
|
+
shape = (
|
662
|
+
batch_size,
|
663
|
+
num_channels_latents,
|
664
|
+
int(height) // self.vae_scale_factor,
|
665
|
+
int(width) // self.vae_scale_factor,
|
666
|
+
)
|
657
667
|
if isinstance(generator, list) and len(generator) != batch_size:
|
658
668
|
raise ValueError(
|
659
669
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -669,38 +679,6 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
669
679
|
latents = latents * self.scheduler.init_noise_sigma
|
670
680
|
return latents
|
671
681
|
|
672
|
-
@staticmethod
|
673
|
-
def classify_height_width_bin(height: int, width: int, ratios: dict) -> Tuple[int, int]:
|
674
|
-
"""Returns binned height and width."""
|
675
|
-
ar = float(height / width)
|
676
|
-
closest_ratio = min(ratios.keys(), key=lambda ratio: abs(float(ratio) - ar))
|
677
|
-
default_hw = ratios[closest_ratio]
|
678
|
-
return int(default_hw[0]), int(default_hw[1])
|
679
|
-
|
680
|
-
@staticmethod
|
681
|
-
def resize_and_crop_tensor(samples: torch.Tensor, new_width: int, new_height: int) -> torch.Tensor:
|
682
|
-
orig_height, orig_width = samples.shape[2], samples.shape[3]
|
683
|
-
|
684
|
-
# Check if resizing is needed
|
685
|
-
if orig_height != new_height or orig_width != new_width:
|
686
|
-
ratio = max(new_height / orig_height, new_width / orig_width)
|
687
|
-
resized_width = int(orig_width * ratio)
|
688
|
-
resized_height = int(orig_height * ratio)
|
689
|
-
|
690
|
-
# Resize
|
691
|
-
samples = F.interpolate(
|
692
|
-
samples, size=(resized_height, resized_width), mode="bilinear", align_corners=False
|
693
|
-
)
|
694
|
-
|
695
|
-
# Center Crop
|
696
|
-
start_x = (resized_width - new_width) // 2
|
697
|
-
end_x = start_x + new_width
|
698
|
-
start_y = (resized_height - new_height) // 2
|
699
|
-
end_y = start_y + new_height
|
700
|
-
samples = samples[:, :, start_y:end_y, start_x:end_x]
|
701
|
-
|
702
|
-
return samples
|
703
|
-
|
704
682
|
@torch.no_grad()
|
705
683
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
706
684
|
def __call__(
|
@@ -709,20 +687,21 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
709
687
|
negative_prompt: str = "",
|
710
688
|
num_inference_steps: int = 20,
|
711
689
|
timesteps: List[int] = None,
|
690
|
+
sigmas: List[float] = None,
|
712
691
|
guidance_scale: float = 4.5,
|
713
692
|
num_images_per_prompt: Optional[int] = 1,
|
714
693
|
height: Optional[int] = None,
|
715
694
|
width: Optional[int] = None,
|
716
695
|
eta: float = 0.0,
|
717
696
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
718
|
-
latents: Optional[torch.
|
719
|
-
prompt_embeds: Optional[torch.
|
720
|
-
prompt_attention_mask: Optional[torch.
|
721
|
-
negative_prompt_embeds: Optional[torch.
|
722
|
-
negative_prompt_attention_mask: Optional[torch.
|
697
|
+
latents: Optional[torch.Tensor] = None,
|
698
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
699
|
+
prompt_attention_mask: Optional[torch.Tensor] = None,
|
700
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
701
|
+
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
|
723
702
|
output_type: Optional[str] = "pil",
|
724
703
|
return_dict: bool = True,
|
725
|
-
callback: Optional[Callable[[int, int, torch.
|
704
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
726
705
|
callback_steps: int = 1,
|
727
706
|
clean_caption: bool = True,
|
728
707
|
use_resolution_binning: bool = True,
|
@@ -744,8 +723,13 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
744
723
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
745
724
|
expense of slower inference.
|
746
725
|
timesteps (`List[int]`, *optional*):
|
747
|
-
Custom timesteps to use for the denoising process
|
748
|
-
|
726
|
+
Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
|
727
|
+
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
728
|
+
passed will be used. Must be in descending order.
|
729
|
+
sigmas (`List[float]`, *optional*):
|
730
|
+
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
731
|
+
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
732
|
+
will be used.
|
749
733
|
guidance_scale (`float`, *optional*, defaults to 4.5):
|
750
734
|
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
|
751
735
|
`guidance_scale` is defined as `w` of equation 2. of [Imagen
|
@@ -764,18 +748,18 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
764
748
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
765
749
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
766
750
|
to make generation deterministic.
|
767
|
-
latents (`torch.
|
751
|
+
latents (`torch.Tensor`, *optional*):
|
768
752
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
769
753
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
770
754
|
tensor will ge generated by sampling using the supplied random `generator`.
|
771
|
-
prompt_embeds (`torch.
|
755
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
772
756
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
773
757
|
provided, text embeddings will be generated from `prompt` input argument.
|
774
|
-
prompt_attention_mask (`torch.
|
775
|
-
negative_prompt_embeds (`torch.
|
758
|
+
prompt_attention_mask (`torch.Tensor`, *optional*): Pre-generated attention mask for text embeddings.
|
759
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
776
760
|
Pre-generated negative text embeddings. For PixArt-Alpha this negative prompt should be "". If not
|
777
761
|
provided, negative_prompt_embeds will be generated from `negative_prompt` input argument.
|
778
|
-
negative_prompt_attention_mask (`torch.
|
762
|
+
negative_prompt_attention_mask (`torch.Tensor`, *optional*):
|
779
763
|
Pre-generated attention mask for negative text embeddings.
|
780
764
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
781
765
|
The output format of the generate image. Choose between
|
@@ -784,7 +768,7 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
784
768
|
Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
|
785
769
|
callback (`Callable`, *optional*):
|
786
770
|
A function that will be called every `callback_steps` steps during inference. The function will be
|
787
|
-
called with the following arguments: `callback(step: int, timestep: int, latents: torch.
|
771
|
+
called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
788
772
|
callback_steps (`int`, *optional*, defaults to 1):
|
789
773
|
The frequency at which the `callback` function will be called. If not specified, the callback will be
|
790
774
|
called at every step.
|
@@ -821,7 +805,7 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
821
805
|
else:
|
822
806
|
raise ValueError("Invalid sample size")
|
823
807
|
orig_height, orig_width = height, width
|
824
|
-
height, width = self.classify_height_width_bin(height, width, ratios=aspect_ratio_bin)
|
808
|
+
height, width = self.image_processor.classify_height_width_bin(height, width, ratios=aspect_ratio_bin)
|
825
809
|
|
826
810
|
self.check_inputs(
|
827
811
|
prompt,
|
@@ -874,7 +858,9 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
874
858
|
prompt_attention_mask = torch.cat([negative_prompt_attention_mask, prompt_attention_mask], dim=0)
|
875
859
|
|
876
860
|
# 4. Prepare timesteps
|
877
|
-
timesteps, num_inference_steps = retrieve_timesteps(
|
861
|
+
timesteps, num_inference_steps = retrieve_timesteps(
|
862
|
+
self.scheduler, num_inference_steps, device, timesteps, sigmas
|
863
|
+
)
|
878
864
|
|
879
865
|
# 5. Prepare latents.
|
880
866
|
latent_channels = self.transformer.config.in_channels
|
@@ -951,7 +937,11 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
951
937
|
noise_pred = noise_pred
|
952
938
|
|
953
939
|
# compute previous image: x_t -> x_t-1
|
954
|
-
|
940
|
+
if num_inference_steps == 1:
|
941
|
+
# For DMD one step sampling: https://arxiv.org/abs/2311.18828
|
942
|
+
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).pred_original_sample
|
943
|
+
else:
|
944
|
+
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
|
955
945
|
|
956
946
|
# call the callback, if provided
|
957
947
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
@@ -963,7 +953,7 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
963
953
|
if not output_type == "latent":
|
964
954
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
965
955
|
if use_resolution_binning:
|
966
|
-
image = self.resize_and_crop_tensor(image, orig_width, orig_height)
|
956
|
+
image = self.image_processor.resize_and_crop_tensor(image, orig_width, orig_height)
|
967
957
|
else:
|
968
958
|
image = latents
|
969
959
|
|