diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,7 @@ import torch
|
|
21
21
|
from packaging import version
|
22
22
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
23
23
|
|
24
|
+
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
24
25
|
from ...configuration_utils import FrozenDict
|
25
26
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
26
27
|
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
@@ -115,6 +116,7 @@ def retrieve_timesteps(
|
|
115
116
|
num_inference_steps: Optional[int] = None,
|
116
117
|
device: Optional[Union[str, torch.device]] = None,
|
117
118
|
timesteps: Optional[List[int]] = None,
|
119
|
+
sigmas: Optional[List[float]] = None,
|
118
120
|
**kwargs,
|
119
121
|
):
|
120
122
|
"""
|
@@ -125,19 +127,23 @@ def retrieve_timesteps(
|
|
125
127
|
scheduler (`SchedulerMixin`):
|
126
128
|
The scheduler to get timesteps from.
|
127
129
|
num_inference_steps (`int`):
|
128
|
-
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
129
|
-
|
130
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
|
131
|
+
must be `None`.
|
130
132
|
device (`str` or `torch.device`, *optional*):
|
131
133
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
132
134
|
timesteps (`List[int]`, *optional*):
|
133
|
-
|
134
|
-
|
135
|
-
|
135
|
+
Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
|
136
|
+
`num_inference_steps` and `sigmas` must be `None`.
|
137
|
+
sigmas (`List[float]`, *optional*):
|
138
|
+
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
139
|
+
`num_inference_steps` and `timesteps` must be `None`.
|
136
140
|
|
137
141
|
Returns:
|
138
142
|
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
139
143
|
second element is the number of inference steps.
|
140
144
|
"""
|
145
|
+
if timesteps is not None and sigmas is not None:
|
146
|
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
141
147
|
if timesteps is not None:
|
142
148
|
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
143
149
|
if not accepts_timesteps:
|
@@ -148,6 +154,16 @@ def retrieve_timesteps(
|
|
148
154
|
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
149
155
|
timesteps = scheduler.timesteps
|
150
156
|
num_inference_steps = len(timesteps)
|
157
|
+
elif sigmas is not None:
|
158
|
+
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
159
|
+
if not accept_sigmas:
|
160
|
+
raise ValueError(
|
161
|
+
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
162
|
+
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
163
|
+
)
|
164
|
+
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
165
|
+
timesteps = scheduler.timesteps
|
166
|
+
num_inference_steps = len(timesteps)
|
151
167
|
else:
|
152
168
|
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
153
169
|
timesteps = scheduler.timesteps
|
@@ -300,8 +316,8 @@ class StableDiffusionImg2ImgPipeline(
|
|
300
316
|
num_images_per_prompt,
|
301
317
|
do_classifier_free_guidance,
|
302
318
|
negative_prompt=None,
|
303
|
-
prompt_embeds: Optional[torch.
|
304
|
-
negative_prompt_embeds: Optional[torch.
|
319
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
320
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
305
321
|
lora_scale: Optional[float] = None,
|
306
322
|
**kwargs,
|
307
323
|
):
|
@@ -333,8 +349,8 @@ class StableDiffusionImg2ImgPipeline(
|
|
333
349
|
num_images_per_prompt,
|
334
350
|
do_classifier_free_guidance,
|
335
351
|
negative_prompt=None,
|
336
|
-
prompt_embeds: Optional[torch.
|
337
|
-
negative_prompt_embeds: Optional[torch.
|
352
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
353
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
338
354
|
lora_scale: Optional[float] = None,
|
339
355
|
clip_skip: Optional[int] = None,
|
340
356
|
):
|
@@ -354,10 +370,10 @@ class StableDiffusionImg2ImgPipeline(
|
|
354
370
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
355
371
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
356
372
|
less than `1`).
|
357
|
-
prompt_embeds (`torch.
|
373
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
358
374
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
359
375
|
provided, text embeddings will be generated from `prompt` input argument.
|
360
|
-
negative_prompt_embeds (`torch.
|
376
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
361
377
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
362
378
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
363
379
|
argument.
|
@@ -767,20 +783,22 @@ class StableDiffusionImg2ImgPipeline(
|
|
767
783
|
return latents
|
768
784
|
|
769
785
|
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
770
|
-
def get_guidance_scale_embedding(
|
786
|
+
def get_guidance_scale_embedding(
|
787
|
+
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
788
|
+
) -> torch.Tensor:
|
771
789
|
"""
|
772
790
|
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
773
791
|
|
774
792
|
Args:
|
775
|
-
|
776
|
-
|
793
|
+
w (`torch.Tensor`):
|
794
|
+
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
777
795
|
embedding_dim (`int`, *optional*, defaults to 512):
|
778
|
-
|
779
|
-
dtype:
|
780
|
-
|
796
|
+
Dimension of the embeddings to generate.
|
797
|
+
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
798
|
+
Data type of the generated embeddings.
|
781
799
|
|
782
800
|
Returns:
|
783
|
-
`torch.
|
801
|
+
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
784
802
|
"""
|
785
803
|
assert len(w.shape) == 1
|
786
804
|
w = w * 1000.0
|
@@ -831,20 +849,23 @@ class StableDiffusionImg2ImgPipeline(
|
|
831
849
|
strength: float = 0.8,
|
832
850
|
num_inference_steps: Optional[int] = 50,
|
833
851
|
timesteps: List[int] = None,
|
852
|
+
sigmas: List[float] = None,
|
834
853
|
guidance_scale: Optional[float] = 7.5,
|
835
854
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
836
855
|
num_images_per_prompt: Optional[int] = 1,
|
837
856
|
eta: Optional[float] = 0.0,
|
838
857
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
839
|
-
prompt_embeds: Optional[torch.
|
840
|
-
negative_prompt_embeds: Optional[torch.
|
858
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
859
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
841
860
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
842
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
861
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
843
862
|
output_type: Optional[str] = "pil",
|
844
863
|
return_dict: bool = True,
|
845
864
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
846
865
|
clip_skip: int = None,
|
847
|
-
callback_on_step_end: Optional[
|
866
|
+
callback_on_step_end: Optional[
|
867
|
+
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
868
|
+
] = None,
|
848
869
|
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
849
870
|
**kwargs,
|
850
871
|
):
|
@@ -854,7 +875,7 @@ class StableDiffusionImg2ImgPipeline(
|
|
854
875
|
Args:
|
855
876
|
prompt (`str` or `List[str]`, *optional*):
|
856
877
|
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
857
|
-
image (`torch.
|
878
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
858
879
|
`Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
|
859
880
|
numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
|
860
881
|
or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
|
@@ -873,6 +894,10 @@ class StableDiffusionImg2ImgPipeline(
|
|
873
894
|
Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
|
874
895
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
875
896
|
passed will be used. Must be in descending order.
|
897
|
+
sigmas (`List[float]`, *optional*):
|
898
|
+
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
899
|
+
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
900
|
+
will be used.
|
876
901
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
877
902
|
A higher guidance scale value encourages the model to generate images closely linked to the text
|
878
903
|
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
|
@@ -887,18 +912,18 @@ class StableDiffusionImg2ImgPipeline(
|
|
887
912
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
888
913
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
889
914
|
generation deterministic.
|
890
|
-
prompt_embeds (`torch.
|
915
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
891
916
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
892
917
|
provided, text embeddings are generated from the `prompt` input argument.
|
893
|
-
negative_prompt_embeds (`torch.
|
918
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
894
919
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
895
920
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
896
921
|
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
897
|
-
ip_adapter_image_embeds (`List[torch.
|
898
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
899
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
900
|
-
if `do_classifier_free_guidance` is set to `True`.
|
901
|
-
|
922
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
923
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
924
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
925
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
926
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
902
927
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
903
928
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
904
929
|
return_dict (`bool`, *optional*, defaults to `True`):
|
@@ -910,11 +935,11 @@ class StableDiffusionImg2ImgPipeline(
|
|
910
935
|
clip_skip (`int`, *optional*):
|
911
936
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
912
937
|
the output of the pre-final layer will be used for computing the prompt embeddings.
|
913
|
-
callback_on_step_end (`Callable`, *optional*):
|
914
|
-
A function
|
915
|
-
with the following arguments: `callback_on_step_end(self:
|
916
|
-
callback_kwargs: Dict)`. `callback_kwargs` will include a
|
917
|
-
`callback_on_step_end_tensor_inputs`.
|
938
|
+
callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
|
939
|
+
A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
|
940
|
+
each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
|
941
|
+
DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
|
942
|
+
list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
|
918
943
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
919
944
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
920
945
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
@@ -945,6 +970,9 @@ class StableDiffusionImg2ImgPipeline(
|
|
945
970
|
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
|
946
971
|
)
|
947
972
|
|
973
|
+
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
|
974
|
+
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
975
|
+
|
948
976
|
# 1. Check inputs. Raise error if not correct
|
949
977
|
self.check_inputs(
|
950
978
|
prompt,
|
@@ -1007,7 +1035,9 @@ class StableDiffusionImg2ImgPipeline(
|
|
1007
1035
|
image = self.image_processor.preprocess(image)
|
1008
1036
|
|
1009
1037
|
# 5. set timesteps
|
1010
|
-
timesteps, num_inference_steps = retrieve_timesteps(
|
1038
|
+
timesteps, num_inference_steps = retrieve_timesteps(
|
1039
|
+
self.scheduler, num_inference_steps, device, timesteps, sigmas
|
1040
|
+
)
|
1011
1041
|
timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
|
1012
1042
|
latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
|
1013
1043
|
|
@@ -21,6 +21,7 @@ import torch
|
|
21
21
|
from packaging import version
|
22
22
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
23
23
|
|
24
|
+
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
24
25
|
from ...configuration_utils import FrozenDict
|
25
26
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
26
27
|
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
@@ -179,6 +180,7 @@ def retrieve_timesteps(
|
|
179
180
|
num_inference_steps: Optional[int] = None,
|
180
181
|
device: Optional[Union[str, torch.device]] = None,
|
181
182
|
timesteps: Optional[List[int]] = None,
|
183
|
+
sigmas: Optional[List[float]] = None,
|
182
184
|
**kwargs,
|
183
185
|
):
|
184
186
|
"""
|
@@ -189,19 +191,23 @@ def retrieve_timesteps(
|
|
189
191
|
scheduler (`SchedulerMixin`):
|
190
192
|
The scheduler to get timesteps from.
|
191
193
|
num_inference_steps (`int`):
|
192
|
-
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
193
|
-
|
194
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
|
195
|
+
must be `None`.
|
194
196
|
device (`str` or `torch.device`, *optional*):
|
195
197
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
196
198
|
timesteps (`List[int]`, *optional*):
|
197
|
-
|
198
|
-
|
199
|
-
|
199
|
+
Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
|
200
|
+
`num_inference_steps` and `sigmas` must be `None`.
|
201
|
+
sigmas (`List[float]`, *optional*):
|
202
|
+
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
203
|
+
`num_inference_steps` and `timesteps` must be `None`.
|
200
204
|
|
201
205
|
Returns:
|
202
206
|
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
203
207
|
second element is the number of inference steps.
|
204
208
|
"""
|
209
|
+
if timesteps is not None and sigmas is not None:
|
210
|
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
205
211
|
if timesteps is not None:
|
206
212
|
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
207
213
|
if not accepts_timesteps:
|
@@ -212,6 +218,16 @@ def retrieve_timesteps(
|
|
212
218
|
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
213
219
|
timesteps = scheduler.timesteps
|
214
220
|
num_inference_steps = len(timesteps)
|
221
|
+
elif sigmas is not None:
|
222
|
+
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
223
|
+
if not accept_sigmas:
|
224
|
+
raise ValueError(
|
225
|
+
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
226
|
+
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
227
|
+
)
|
228
|
+
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
229
|
+
timesteps = scheduler.timesteps
|
230
|
+
num_inference_steps = len(timesteps)
|
215
231
|
else:
|
216
232
|
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
217
233
|
timesteps = scheduler.timesteps
|
@@ -372,8 +388,8 @@ class StableDiffusionInpaintPipeline(
|
|
372
388
|
num_images_per_prompt,
|
373
389
|
do_classifier_free_guidance,
|
374
390
|
negative_prompt=None,
|
375
|
-
prompt_embeds: Optional[torch.
|
376
|
-
negative_prompt_embeds: Optional[torch.
|
391
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
392
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
377
393
|
lora_scale: Optional[float] = None,
|
378
394
|
**kwargs,
|
379
395
|
):
|
@@ -405,8 +421,8 @@ class StableDiffusionInpaintPipeline(
|
|
405
421
|
num_images_per_prompt,
|
406
422
|
do_classifier_free_guidance,
|
407
423
|
negative_prompt=None,
|
408
|
-
prompt_embeds: Optional[torch.
|
409
|
-
negative_prompt_embeds: Optional[torch.
|
424
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
425
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
410
426
|
lora_scale: Optional[float] = None,
|
411
427
|
clip_skip: Optional[int] = None,
|
412
428
|
):
|
@@ -426,10 +442,10 @@ class StableDiffusionInpaintPipeline(
|
|
426
442
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
427
443
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
428
444
|
less than `1`).
|
429
|
-
prompt_embeds (`torch.
|
445
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
430
446
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
431
447
|
provided, text embeddings will be generated from `prompt` input argument.
|
432
|
-
negative_prompt_embeds (`torch.
|
448
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
433
449
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
434
450
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
435
451
|
argument.
|
@@ -795,7 +811,12 @@ class StableDiffusionInpaintPipeline(
|
|
795
811
|
return_noise=False,
|
796
812
|
return_image_latents=False,
|
797
813
|
):
|
798
|
-
shape = (
|
814
|
+
shape = (
|
815
|
+
batch_size,
|
816
|
+
num_channels_latents,
|
817
|
+
int(height) // self.vae_scale_factor,
|
818
|
+
int(width) // self.vae_scale_factor,
|
819
|
+
)
|
799
820
|
if isinstance(generator, list) and len(generator) != batch_size:
|
800
821
|
raise ValueError(
|
801
822
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -909,20 +930,22 @@ class StableDiffusionInpaintPipeline(
|
|
909
930
|
return timesteps, num_inference_steps - t_start
|
910
931
|
|
911
932
|
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
912
|
-
def get_guidance_scale_embedding(
|
933
|
+
def get_guidance_scale_embedding(
|
934
|
+
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
935
|
+
) -> torch.Tensor:
|
913
936
|
"""
|
914
937
|
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
915
938
|
|
916
939
|
Args:
|
917
|
-
|
918
|
-
|
940
|
+
w (`torch.Tensor`):
|
941
|
+
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
919
942
|
embedding_dim (`int`, *optional*, defaults to 512):
|
920
|
-
|
921
|
-
dtype:
|
922
|
-
|
943
|
+
Dimension of the embeddings to generate.
|
944
|
+
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
945
|
+
Data type of the generated embeddings.
|
923
946
|
|
924
947
|
Returns:
|
925
|
-
`torch.
|
948
|
+
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
926
949
|
"""
|
927
950
|
assert len(w.shape) == 1
|
928
951
|
w = w * 1000.0
|
@@ -970,28 +993,31 @@ class StableDiffusionInpaintPipeline(
|
|
970
993
|
prompt: Union[str, List[str]] = None,
|
971
994
|
image: PipelineImageInput = None,
|
972
995
|
mask_image: PipelineImageInput = None,
|
973
|
-
masked_image_latents: torch.
|
996
|
+
masked_image_latents: torch.Tensor = None,
|
974
997
|
height: Optional[int] = None,
|
975
998
|
width: Optional[int] = None,
|
976
999
|
padding_mask_crop: Optional[int] = None,
|
977
1000
|
strength: float = 1.0,
|
978
1001
|
num_inference_steps: int = 50,
|
979
1002
|
timesteps: List[int] = None,
|
1003
|
+
sigmas: List[float] = None,
|
980
1004
|
guidance_scale: float = 7.5,
|
981
1005
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
982
1006
|
num_images_per_prompt: Optional[int] = 1,
|
983
1007
|
eta: float = 0.0,
|
984
1008
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
985
|
-
latents: Optional[torch.
|
986
|
-
prompt_embeds: Optional[torch.
|
987
|
-
negative_prompt_embeds: Optional[torch.
|
1009
|
+
latents: Optional[torch.Tensor] = None,
|
1010
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
1011
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
988
1012
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
989
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
1013
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
990
1014
|
output_type: Optional[str] = "pil",
|
991
1015
|
return_dict: bool = True,
|
992
1016
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
993
1017
|
clip_skip: int = None,
|
994
|
-
callback_on_step_end: Optional[
|
1018
|
+
callback_on_step_end: Optional[
|
1019
|
+
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
1020
|
+
] = None,
|
995
1021
|
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
996
1022
|
**kwargs,
|
997
1023
|
):
|
@@ -1001,14 +1027,14 @@ class StableDiffusionInpaintPipeline(
|
|
1001
1027
|
Args:
|
1002
1028
|
prompt (`str` or `List[str]`, *optional*):
|
1003
1029
|
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
1004
|
-
image (`torch.
|
1030
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
1005
1031
|
`Image`, numpy array or tensor representing an image batch to be inpainted (which parts of the image to
|
1006
1032
|
be masked out with `mask_image` and repainted according to `prompt`). For both numpy array and pytorch
|
1007
1033
|
tensor, the expected value range is between `[0, 1]` If it's a tensor or a list or tensors, the
|
1008
1034
|
expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a list of arrays, the
|
1009
1035
|
expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image latents as `image`, but
|
1010
1036
|
if passing latents directly it is not encoded again.
|
1011
|
-
mask_image (`torch.
|
1037
|
+
mask_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
1012
1038
|
`Image`, numpy array or tensor representing an image batch to mask `image`. White pixels in the mask
|
1013
1039
|
are repainted while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
|
1014
1040
|
single channel (luminance) before use. If it's a numpy array or pytorch tensor, it should contain one
|
@@ -1020,11 +1046,12 @@ class StableDiffusionInpaintPipeline(
|
|
1020
1046
|
width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
1021
1047
|
The width in pixels of the generated image.
|
1022
1048
|
padding_mask_crop (`int`, *optional*, defaults to `None`):
|
1023
|
-
The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to
|
1024
|
-
`padding_mask_crop` is not `None`, it will first find a rectangular region
|
1025
|
-
contains all masked area, and then expand that area based
|
1026
|
-
|
1027
|
-
|
1049
|
+
The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to
|
1050
|
+
image and mask_image. If `padding_mask_crop` is not `None`, it will first find a rectangular region
|
1051
|
+
with the same aspect ration of the image and contains all masked area, and then expand that area based
|
1052
|
+
on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
|
1053
|
+
resizing to the original image size for inpainting. This is useful when the masked area is small while
|
1054
|
+
the image is large and contain information irrelevant for inpainting, such as background.
|
1028
1055
|
strength (`float`, *optional*, defaults to 1.0):
|
1029
1056
|
Indicates extent to transform the reference `image`. Must be between 0 and 1. `image` is used as a
|
1030
1057
|
starting point and more noise is added the higher the `strength`. The number of denoising steps depends
|
@@ -1038,6 +1065,10 @@ class StableDiffusionInpaintPipeline(
|
|
1038
1065
|
Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
|
1039
1066
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
1040
1067
|
passed will be used. Must be in descending order.
|
1068
|
+
sigmas (`List[float]`, *optional*):
|
1069
|
+
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
1070
|
+
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
1071
|
+
will be used.
|
1041
1072
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
1042
1073
|
A higher guidance scale value encourages the model to generate images closely linked to the text
|
1043
1074
|
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
|
@@ -1052,22 +1083,22 @@ class StableDiffusionInpaintPipeline(
|
|
1052
1083
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1053
1084
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1054
1085
|
generation deterministic.
|
1055
|
-
latents (`torch.
|
1086
|
+
latents (`torch.Tensor`, *optional*):
|
1056
1087
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
1057
1088
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
1058
1089
|
tensor is generated by sampling using the supplied random `generator`.
|
1059
|
-
prompt_embeds (`torch.
|
1090
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
1060
1091
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
1061
1092
|
provided, text embeddings are generated from the `prompt` input argument.
|
1062
|
-
negative_prompt_embeds (`torch.
|
1093
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
1063
1094
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
1064
1095
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
1065
1096
|
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
1066
|
-
ip_adapter_image_embeds (`List[torch.
|
1067
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
1068
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
1069
|
-
if `do_classifier_free_guidance` is set to `True`.
|
1070
|
-
|
1097
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
1098
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
1099
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
1100
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
1101
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
1071
1102
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
1072
1103
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
1073
1104
|
return_dict (`bool`, *optional*, defaults to `True`):
|
@@ -1079,11 +1110,11 @@ class StableDiffusionInpaintPipeline(
|
|
1079
1110
|
clip_skip (`int`, *optional*):
|
1080
1111
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
1081
1112
|
the output of the pre-final layer will be used for computing the prompt embeddings.
|
1082
|
-
callback_on_step_end (`Callable`, *optional*):
|
1083
|
-
A function
|
1084
|
-
with the following arguments: `callback_on_step_end(self:
|
1085
|
-
callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1086
|
-
`callback_on_step_end_tensor_inputs`.
|
1113
|
+
callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
|
1114
|
+
A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
|
1115
|
+
each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
|
1116
|
+
DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1117
|
+
list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
|
1087
1118
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
1088
1119
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
1089
1120
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
@@ -1143,6 +1174,9 @@ class StableDiffusionInpaintPipeline(
|
|
1143
1174
|
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
|
1144
1175
|
)
|
1145
1176
|
|
1177
|
+
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
|
1178
|
+
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
1179
|
+
|
1146
1180
|
# 0. Default height and width to unet
|
1147
1181
|
height = height or self.unet.config.sample_size * self.vae_scale_factor
|
1148
1182
|
width = width or self.unet.config.sample_size * self.vae_scale_factor
|
@@ -1212,7 +1246,9 @@ class StableDiffusionInpaintPipeline(
|
|
1212
1246
|
)
|
1213
1247
|
|
1214
1248
|
# 4. set timesteps
|
1215
|
-
timesteps, num_inference_steps = retrieve_timesteps(
|
1249
|
+
timesteps, num_inference_steps = retrieve_timesteps(
|
1250
|
+
self.scheduler, num_inference_steps, device, timesteps, sigmas
|
1251
|
+
)
|
1216
1252
|
timesteps, num_inference_steps = self.get_timesteps(
|
1217
1253
|
num_inference_steps=num_inference_steps, strength=strength, device=device
|
1218
1254
|
)
|