diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,7 @@ from transformers import (
|
|
25
25
|
CLIPVisionModelWithProjection,
|
26
26
|
)
|
27
27
|
|
28
|
+
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
28
29
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
29
30
|
from ...loaders import (
|
30
31
|
FromSingleFileMixin,
|
@@ -124,6 +125,7 @@ def retrieve_timesteps(
|
|
124
125
|
num_inference_steps: Optional[int] = None,
|
125
126
|
device: Optional[Union[str, torch.device]] = None,
|
126
127
|
timesteps: Optional[List[int]] = None,
|
128
|
+
sigmas: Optional[List[float]] = None,
|
127
129
|
**kwargs,
|
128
130
|
):
|
129
131
|
"""
|
@@ -134,19 +136,23 @@ def retrieve_timesteps(
|
|
134
136
|
scheduler (`SchedulerMixin`):
|
135
137
|
The scheduler to get timesteps from.
|
136
138
|
num_inference_steps (`int`):
|
137
|
-
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
138
|
-
|
139
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
|
140
|
+
must be `None`.
|
139
141
|
device (`str` or `torch.device`, *optional*):
|
140
142
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
141
143
|
timesteps (`List[int]`, *optional*):
|
142
|
-
|
143
|
-
|
144
|
-
|
144
|
+
Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
|
145
|
+
`num_inference_steps` and `sigmas` must be `None`.
|
146
|
+
sigmas (`List[float]`, *optional*):
|
147
|
+
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
148
|
+
`num_inference_steps` and `timesteps` must be `None`.
|
145
149
|
|
146
150
|
Returns:
|
147
151
|
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
148
152
|
second element is the number of inference steps.
|
149
153
|
"""
|
154
|
+
if timesteps is not None and sigmas is not None:
|
155
|
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
150
156
|
if timesteps is not None:
|
151
157
|
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
152
158
|
if not accepts_timesteps:
|
@@ -157,6 +163,16 @@ def retrieve_timesteps(
|
|
157
163
|
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
158
164
|
timesteps = scheduler.timesteps
|
159
165
|
num_inference_steps = len(timesteps)
|
166
|
+
elif sigmas is not None:
|
167
|
+
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
168
|
+
if not accept_sigmas:
|
169
|
+
raise ValueError(
|
170
|
+
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
171
|
+
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
172
|
+
)
|
173
|
+
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
174
|
+
timesteps = scheduler.timesteps
|
175
|
+
num_inference_steps = len(timesteps)
|
160
176
|
else:
|
161
177
|
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
162
178
|
timesteps = scheduler.timesteps
|
@@ -288,10 +304,10 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
288
304
|
do_classifier_free_guidance: bool = True,
|
289
305
|
negative_prompt: Optional[str] = None,
|
290
306
|
negative_prompt_2: Optional[str] = None,
|
291
|
-
prompt_embeds: Optional[torch.
|
292
|
-
negative_prompt_embeds: Optional[torch.
|
293
|
-
pooled_prompt_embeds: Optional[torch.
|
294
|
-
negative_pooled_prompt_embeds: Optional[torch.
|
307
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
308
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
309
|
+
pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
310
|
+
negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
295
311
|
lora_scale: Optional[float] = None,
|
296
312
|
clip_skip: Optional[int] = None,
|
297
313
|
):
|
@@ -317,17 +333,17 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
317
333
|
negative_prompt_2 (`str` or `List[str]`, *optional*):
|
318
334
|
The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
|
319
335
|
`text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
|
320
|
-
prompt_embeds (`torch.
|
336
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
321
337
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
322
338
|
provided, text embeddings will be generated from `prompt` input argument.
|
323
|
-
negative_prompt_embeds (`torch.
|
339
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
324
340
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
325
341
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
326
342
|
argument.
|
327
|
-
pooled_prompt_embeds (`torch.
|
343
|
+
pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
328
344
|
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
329
345
|
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
330
|
-
negative_pooled_prompt_embeds (`torch.
|
346
|
+
negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
331
347
|
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
332
348
|
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
333
349
|
input argument.
|
@@ -647,7 +663,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
647
663
|
# because `num_inference_steps` might be even given that every timestep
|
648
664
|
# (except the highest one) is duplicated. If `num_inference_steps` is even it would
|
649
665
|
# mean that we cut the timesteps in the middle of the denoising step
|
650
|
-
# (between 1st and 2nd
|
666
|
+
# (between 1st and 2nd derivative) which leads to incorrect results. By adding 1
|
651
667
|
# we ensure that the denoising process always ends after the 2nd derivate step of the scheduler
|
652
668
|
num_inference_steps = num_inference_steps + 1
|
653
669
|
|
@@ -665,6 +681,12 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
665
681
|
f"`image` has to be of type `torch.Tensor`, `PIL.Image.Image` or list but is {type(image)}"
|
666
682
|
)
|
667
683
|
|
684
|
+
latents_mean = latents_std = None
|
685
|
+
if hasattr(self.vae.config, "latents_mean") and self.vae.config.latents_mean is not None:
|
686
|
+
latents_mean = torch.tensor(self.vae.config.latents_mean).view(1, 4, 1, 1)
|
687
|
+
if hasattr(self.vae.config, "latents_std") and self.vae.config.latents_std is not None:
|
688
|
+
latents_std = torch.tensor(self.vae.config.latents_std).view(1, 4, 1, 1)
|
689
|
+
|
668
690
|
# Offload text encoder if `enable_model_cpu_offload` was enabled
|
669
691
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
670
692
|
self.text_encoder_2.to("cpu")
|
@@ -702,7 +724,12 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
702
724
|
self.vae.to(dtype)
|
703
725
|
|
704
726
|
init_latents = init_latents.to(dtype)
|
705
|
-
|
727
|
+
if latents_mean is not None and latents_std is not None:
|
728
|
+
latents_mean = latents_mean.to(device=self.device, dtype=dtype)
|
729
|
+
latents_std = latents_std.to(device=self.device, dtype=dtype)
|
730
|
+
init_latents = (init_latents - latents_mean) * self.vae.config.scaling_factor / latents_std
|
731
|
+
else:
|
732
|
+
init_latents = self.vae.config.scaling_factor * init_latents
|
706
733
|
|
707
734
|
if batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] == 0:
|
708
735
|
# expand init_latents for batch_size
|
@@ -874,20 +901,22 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
874
901
|
self.vae.decoder.mid_block.to(dtype)
|
875
902
|
|
876
903
|
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
877
|
-
def get_guidance_scale_embedding(
|
904
|
+
def get_guidance_scale_embedding(
|
905
|
+
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
906
|
+
) -> torch.Tensor:
|
878
907
|
"""
|
879
908
|
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
880
909
|
|
881
910
|
Args:
|
882
|
-
|
883
|
-
|
911
|
+
w (`torch.Tensor`):
|
912
|
+
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
884
913
|
embedding_dim (`int`, *optional*, defaults to 512):
|
885
|
-
|
886
|
-
dtype:
|
887
|
-
|
914
|
+
Dimension of the embeddings to generate.
|
915
|
+
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
916
|
+
Data type of the generated embeddings.
|
888
917
|
|
889
918
|
Returns:
|
890
|
-
`torch.
|
919
|
+
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
891
920
|
"""
|
892
921
|
assert len(w.shape) == 1
|
893
922
|
w = w * 1000.0
|
@@ -951,6 +980,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
951
980
|
strength: float = 0.3,
|
952
981
|
num_inference_steps: int = 50,
|
953
982
|
timesteps: List[int] = None,
|
983
|
+
sigmas: List[float] = None,
|
954
984
|
denoising_start: Optional[float] = None,
|
955
985
|
denoising_end: Optional[float] = None,
|
956
986
|
guidance_scale: float = 5.0,
|
@@ -959,13 +989,13 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
959
989
|
num_images_per_prompt: Optional[int] = 1,
|
960
990
|
eta: float = 0.0,
|
961
991
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
962
|
-
latents: Optional[torch.
|
963
|
-
prompt_embeds: Optional[torch.
|
964
|
-
negative_prompt_embeds: Optional[torch.
|
965
|
-
pooled_prompt_embeds: Optional[torch.
|
966
|
-
negative_pooled_prompt_embeds: Optional[torch.
|
992
|
+
latents: Optional[torch.Tensor] = None,
|
993
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
994
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
995
|
+
pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
996
|
+
negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
967
997
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
968
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
998
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
969
999
|
output_type: Optional[str] = "pil",
|
970
1000
|
return_dict: bool = True,
|
971
1001
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -979,7 +1009,9 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
979
1009
|
aesthetic_score: float = 6.0,
|
980
1010
|
negative_aesthetic_score: float = 2.5,
|
981
1011
|
clip_skip: Optional[int] = None,
|
982
|
-
callback_on_step_end: Optional[
|
1012
|
+
callback_on_step_end: Optional[
|
1013
|
+
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
1014
|
+
] = None,
|
983
1015
|
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
984
1016
|
**kwargs,
|
985
1017
|
):
|
@@ -993,7 +1025,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
993
1025
|
prompt_2 (`str` or `List[str]`, *optional*):
|
994
1026
|
The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
|
995
1027
|
used in both text-encoders
|
996
|
-
image (`torch.
|
1028
|
+
image (`torch.Tensor` or `PIL.Image.Image` or `np.ndarray` or `List[torch.Tensor]` or `List[PIL.Image.Image]` or `List[np.ndarray]`):
|
997
1029
|
The image(s) to modify with the pipeline.
|
998
1030
|
strength (`float`, *optional*, defaults to 0.3):
|
999
1031
|
Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
|
@@ -1009,6 +1041,10 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1009
1041
|
Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
|
1010
1042
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
1011
1043
|
passed will be used. Must be in descending order.
|
1044
|
+
sigmas (`List[float]`, *optional*):
|
1045
|
+
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
1046
|
+
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
1047
|
+
will be used.
|
1012
1048
|
denoising_start (`float`, *optional*):
|
1013
1049
|
When specified, indicates the fraction (between 0.0 and 1.0) of the total denoising process to be
|
1014
1050
|
bypassed before it is initiated. Consequently, the initial part of the denoising process is skipped and
|
@@ -1045,30 +1081,30 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1045
1081
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1046
1082
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
1047
1083
|
to make generation deterministic.
|
1048
|
-
latents (`torch.
|
1084
|
+
latents (`torch.Tensor`, *optional*):
|
1049
1085
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
1050
1086
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
1051
1087
|
tensor will ge generated by sampling using the supplied random `generator`.
|
1052
|
-
prompt_embeds (`torch.
|
1088
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
1053
1089
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
1054
1090
|
provided, text embeddings will be generated from `prompt` input argument.
|
1055
|
-
negative_prompt_embeds (`torch.
|
1091
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
1056
1092
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
1057
1093
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
1058
1094
|
argument.
|
1059
|
-
pooled_prompt_embeds (`torch.
|
1095
|
+
pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
1060
1096
|
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
1061
1097
|
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
1062
|
-
negative_pooled_prompt_embeds (`torch.
|
1098
|
+
negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
1063
1099
|
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
1064
1100
|
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
1065
1101
|
input argument.
|
1066
1102
|
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
1067
|
-
ip_adapter_image_embeds (`List[torch.
|
1068
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
1069
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
1070
|
-
if `do_classifier_free_guidance` is set to `True`.
|
1071
|
-
|
1103
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
1104
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
1105
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
1106
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
1107
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
1072
1108
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
1073
1109
|
The output format of the generate image. Choose between
|
1074
1110
|
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
@@ -1124,11 +1160,11 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1124
1160
|
clip_skip (`int`, *optional*):
|
1125
1161
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
1126
1162
|
the output of the pre-final layer will be used for computing the prompt embeddings.
|
1127
|
-
callback_on_step_end (`Callable`, *optional*):
|
1128
|
-
A function
|
1129
|
-
with the following arguments: `callback_on_step_end(self:
|
1130
|
-
callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1131
|
-
`callback_on_step_end_tensor_inputs`.
|
1163
|
+
callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
|
1164
|
+
A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
|
1165
|
+
each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
|
1166
|
+
DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1167
|
+
list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
|
1132
1168
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
1133
1169
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
1134
1170
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
@@ -1158,6 +1194,9 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1158
1194
|
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
|
1159
1195
|
)
|
1160
1196
|
|
1197
|
+
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
|
1198
|
+
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
1199
|
+
|
1161
1200
|
# 1. Check inputs. Raise error if not correct
|
1162
1201
|
self.check_inputs(
|
1163
1202
|
prompt,
|
@@ -1224,7 +1263,9 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1224
1263
|
def denoising_value_valid(dnv):
|
1225
1264
|
return isinstance(dnv, float) and 0 < dnv < 1
|
1226
1265
|
|
1227
|
-
timesteps, num_inference_steps = retrieve_timesteps(
|
1266
|
+
timesteps, num_inference_steps = retrieve_timesteps(
|
1267
|
+
self.scheduler, num_inference_steps, device, timesteps, sigmas
|
1268
|
+
)
|
1228
1269
|
timesteps, num_inference_steps = self.get_timesteps(
|
1229
1270
|
num_inference_steps,
|
1230
1271
|
strength,
|
@@ -1234,17 +1275,19 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1234
1275
|
latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
|
1235
1276
|
|
1236
1277
|
add_noise = True if self.denoising_start is None else False
|
1278
|
+
|
1237
1279
|
# 6. Prepare latent variables
|
1238
|
-
latents
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1280
|
+
if latents is None:
|
1281
|
+
latents = self.prepare_latents(
|
1282
|
+
image,
|
1283
|
+
latent_timestep,
|
1284
|
+
batch_size,
|
1285
|
+
num_images_per_prompt,
|
1286
|
+
prompt_embeds.dtype,
|
1287
|
+
device,
|
1288
|
+
generator,
|
1289
|
+
add_noise,
|
1290
|
+
)
|
1248
1291
|
# 7. Prepare extra step kwargs.
|
1249
1292
|
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
1250
1293
|
|
@@ -1368,7 +1411,12 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1368
1411
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1369
1412
|
|
1370
1413
|
# compute the previous noisy sample x_t -> x_t-1
|
1414
|
+
latents_dtype = latents.dtype
|
1371
1415
|
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
|
1416
|
+
if latents.dtype != latents_dtype:
|
1417
|
+
if torch.backends.mps.is_available():
|
1418
|
+
# some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
|
1419
|
+
latents = latents.to(latents_dtype)
|
1372
1420
|
|
1373
1421
|
if callback_on_step_end is not None:
|
1374
1422
|
callback_kwargs = {}
|
@@ -1403,6 +1451,10 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1403
1451
|
if needs_upcasting:
|
1404
1452
|
self.upcast_vae()
|
1405
1453
|
latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
|
1454
|
+
elif latents.dtype != self.vae.dtype:
|
1455
|
+
if torch.backends.mps.is_available():
|
1456
|
+
# some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
|
1457
|
+
self.vae = self.vae.to(latents.dtype)
|
1406
1458
|
|
1407
1459
|
# unscale/denormalize the latents
|
1408
1460
|
# denormalize with the mean and std if available and not None
|