diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,7 @@ import torch
|
|
22
22
|
import torch.nn.functional as F
|
23
23
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
24
24
|
|
25
|
+
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
25
26
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
26
27
|
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
27
28
|
from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
|
@@ -97,6 +98,7 @@ def retrieve_timesteps(
|
|
97
98
|
num_inference_steps: Optional[int] = None,
|
98
99
|
device: Optional[Union[str, torch.device]] = None,
|
99
100
|
timesteps: Optional[List[int]] = None,
|
101
|
+
sigmas: Optional[List[float]] = None,
|
100
102
|
**kwargs,
|
101
103
|
):
|
102
104
|
"""
|
@@ -107,19 +109,23 @@ def retrieve_timesteps(
|
|
107
109
|
scheduler (`SchedulerMixin`):
|
108
110
|
The scheduler to get timesteps from.
|
109
111
|
num_inference_steps (`int`):
|
110
|
-
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
111
|
-
|
112
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
|
113
|
+
must be `None`.
|
112
114
|
device (`str` or `torch.device`, *optional*):
|
113
115
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
114
116
|
timesteps (`List[int]`, *optional*):
|
115
|
-
|
116
|
-
|
117
|
-
|
117
|
+
Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
|
118
|
+
`num_inference_steps` and `sigmas` must be `None`.
|
119
|
+
sigmas (`List[float]`, *optional*):
|
120
|
+
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
121
|
+
`num_inference_steps` and `timesteps` must be `None`.
|
118
122
|
|
119
123
|
Returns:
|
120
124
|
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
121
125
|
second element is the number of inference steps.
|
122
126
|
"""
|
127
|
+
if timesteps is not None and sigmas is not None:
|
128
|
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
123
129
|
if timesteps is not None:
|
124
130
|
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
125
131
|
if not accepts_timesteps:
|
@@ -130,6 +136,16 @@ def retrieve_timesteps(
|
|
130
136
|
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
131
137
|
timesteps = scheduler.timesteps
|
132
138
|
num_inference_steps = len(timesteps)
|
139
|
+
elif sigmas is not None:
|
140
|
+
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
141
|
+
if not accept_sigmas:
|
142
|
+
raise ValueError(
|
143
|
+
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
144
|
+
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
145
|
+
)
|
146
|
+
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
147
|
+
timesteps = scheduler.timesteps
|
148
|
+
num_inference_steps = len(timesteps)
|
133
149
|
else:
|
134
150
|
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
135
151
|
timesteps = scheduler.timesteps
|
@@ -246,8 +262,8 @@ class StableDiffusionControlNetPipeline(
|
|
246
262
|
num_images_per_prompt,
|
247
263
|
do_classifier_free_guidance,
|
248
264
|
negative_prompt=None,
|
249
|
-
prompt_embeds: Optional[torch.
|
250
|
-
negative_prompt_embeds: Optional[torch.
|
265
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
266
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
251
267
|
lora_scale: Optional[float] = None,
|
252
268
|
**kwargs,
|
253
269
|
):
|
@@ -279,8 +295,8 @@ class StableDiffusionControlNetPipeline(
|
|
279
295
|
num_images_per_prompt,
|
280
296
|
do_classifier_free_guidance,
|
281
297
|
negative_prompt=None,
|
282
|
-
prompt_embeds: Optional[torch.
|
283
|
-
negative_prompt_embeds: Optional[torch.
|
298
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
299
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
284
300
|
lora_scale: Optional[float] = None,
|
285
301
|
clip_skip: Optional[int] = None,
|
286
302
|
):
|
@@ -300,10 +316,10 @@ class StableDiffusionControlNetPipeline(
|
|
300
316
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
301
317
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
302
318
|
less than `1`).
|
303
|
-
prompt_embeds (`torch.
|
319
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
304
320
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
305
321
|
provided, text embeddings will be generated from `prompt` input argument.
|
306
|
-
negative_prompt_embeds (`torch.
|
322
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
307
323
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
308
324
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
309
325
|
argument.
|
@@ -661,9 +677,9 @@ class StableDiffusionControlNetPipeline(
|
|
661
677
|
raise ValueError(
|
662
678
|
f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
|
663
679
|
)
|
664
|
-
|
665
|
-
|
666
|
-
|
680
|
+
else:
|
681
|
+
for image_ in image:
|
682
|
+
self.check_image(image_, prompt, prompt_embeds)
|
667
683
|
else:
|
668
684
|
assert False
|
669
685
|
|
@@ -807,7 +823,12 @@ class StableDiffusionControlNetPipeline(
|
|
807
823
|
|
808
824
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
809
825
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
810
|
-
shape = (
|
826
|
+
shape = (
|
827
|
+
batch_size,
|
828
|
+
num_channels_latents,
|
829
|
+
int(height) // self.vae_scale_factor,
|
830
|
+
int(width) // self.vae_scale_factor,
|
831
|
+
)
|
811
832
|
if isinstance(generator, list) and len(generator) != batch_size:
|
812
833
|
raise ValueError(
|
813
834
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -824,20 +845,22 @@ class StableDiffusionControlNetPipeline(
|
|
824
845
|
return latents
|
825
846
|
|
826
847
|
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
827
|
-
def get_guidance_scale_embedding(
|
848
|
+
def get_guidance_scale_embedding(
|
849
|
+
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
850
|
+
) -> torch.Tensor:
|
828
851
|
"""
|
829
852
|
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
830
853
|
|
831
854
|
Args:
|
832
|
-
|
833
|
-
|
855
|
+
w (`torch.Tensor`):
|
856
|
+
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
834
857
|
embedding_dim (`int`, *optional*, defaults to 512):
|
835
|
-
|
836
|
-
dtype:
|
837
|
-
|
858
|
+
Dimension of the embeddings to generate.
|
859
|
+
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
860
|
+
Data type of the generated embeddings.
|
838
861
|
|
839
862
|
Returns:
|
840
|
-
`torch.
|
863
|
+
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
841
864
|
"""
|
842
865
|
assert len(w.shape) == 1
|
843
866
|
w = w * 1000.0
|
@@ -885,16 +908,17 @@ class StableDiffusionControlNetPipeline(
|
|
885
908
|
width: Optional[int] = None,
|
886
909
|
num_inference_steps: int = 50,
|
887
910
|
timesteps: List[int] = None,
|
911
|
+
sigmas: List[float] = None,
|
888
912
|
guidance_scale: float = 7.5,
|
889
913
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
890
914
|
num_images_per_prompt: Optional[int] = 1,
|
891
915
|
eta: float = 0.0,
|
892
916
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
893
|
-
latents: Optional[torch.
|
894
|
-
prompt_embeds: Optional[torch.
|
895
|
-
negative_prompt_embeds: Optional[torch.
|
917
|
+
latents: Optional[torch.Tensor] = None,
|
918
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
919
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
896
920
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
897
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
921
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
898
922
|
output_type: Optional[str] = "pil",
|
899
923
|
return_dict: bool = True,
|
900
924
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -903,7 +927,9 @@ class StableDiffusionControlNetPipeline(
|
|
903
927
|
control_guidance_start: Union[float, List[float]] = 0.0,
|
904
928
|
control_guidance_end: Union[float, List[float]] = 1.0,
|
905
929
|
clip_skip: Optional[int] = None,
|
906
|
-
callback_on_step_end: Optional[
|
930
|
+
callback_on_step_end: Optional[
|
931
|
+
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
932
|
+
] = None,
|
907
933
|
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
908
934
|
**kwargs,
|
909
935
|
):
|
@@ -913,16 +939,16 @@ class StableDiffusionControlNetPipeline(
|
|
913
939
|
Args:
|
914
940
|
prompt (`str` or `List[str]`, *optional*):
|
915
941
|
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
916
|
-
image (`torch.
|
917
|
-
`List[List[torch.
|
942
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
|
943
|
+
`List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
|
918
944
|
The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
|
919
|
-
specified as `torch.
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
each will be paired with each prompt in the `prompt` list. This also applies to multiple
|
925
|
-
where a list of image lists can be passed to batch for each prompt and each ControlNet.
|
945
|
+
specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
|
946
|
+
as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
|
947
|
+
width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
|
948
|
+
images must be passed as a list such that each element of the list can be correctly batched for input
|
949
|
+
to a single ControlNet. When `prompt` is a list, and if a list of images is passed for a single
|
950
|
+
ControlNet, each will be paired with each prompt in the `prompt` list. This also applies to multiple
|
951
|
+
ControlNets, where a list of image lists can be passed to batch for each prompt and each ControlNet.
|
926
952
|
height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
927
953
|
The height in pixels of the generated image.
|
928
954
|
width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
@@ -934,6 +960,10 @@ class StableDiffusionControlNetPipeline(
|
|
934
960
|
Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
|
935
961
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
936
962
|
passed will be used. Must be in descending order.
|
963
|
+
sigmas (`List[float]`, *optional*):
|
964
|
+
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
965
|
+
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
966
|
+
will be used.
|
937
967
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
938
968
|
A higher guidance scale value encourages the model to generate images closely linked to the text
|
939
969
|
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
|
@@ -948,22 +978,22 @@ class StableDiffusionControlNetPipeline(
|
|
948
978
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
949
979
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
950
980
|
generation deterministic.
|
951
|
-
latents (`torch.
|
981
|
+
latents (`torch.Tensor`, *optional*):
|
952
982
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
953
983
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
954
984
|
tensor is generated by sampling using the supplied random `generator`.
|
955
|
-
prompt_embeds (`torch.
|
985
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
956
986
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
957
987
|
provided, text embeddings are generated from the `prompt` input argument.
|
958
|
-
negative_prompt_embeds (`torch.
|
988
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
959
989
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
960
990
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
961
991
|
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
962
|
-
ip_adapter_image_embeds (`List[torch.
|
963
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
964
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
965
|
-
if `do_classifier_free_guidance` is set to `True`.
|
966
|
-
|
992
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
993
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
994
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
995
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
996
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
967
997
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
968
998
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
969
999
|
return_dict (`bool`, *optional*, defaults to `True`):
|
@@ -971,7 +1001,7 @@ class StableDiffusionControlNetPipeline(
|
|
971
1001
|
plain tuple.
|
972
1002
|
callback (`Callable`, *optional*):
|
973
1003
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
974
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
1004
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
975
1005
|
callback_steps (`int`, *optional*, defaults to 1):
|
976
1006
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
977
1007
|
every step.
|
@@ -992,15 +1022,15 @@ class StableDiffusionControlNetPipeline(
|
|
992
1022
|
clip_skip (`int`, *optional*):
|
993
1023
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
994
1024
|
the output of the pre-final layer will be used for computing the prompt embeddings.
|
995
|
-
callback_on_step_end (`Callable`, *optional*):
|
996
|
-
A function
|
997
|
-
with the following arguments: `callback_on_step_end(self:
|
998
|
-
callback_kwargs: Dict)`. `callback_kwargs` will include a
|
999
|
-
`callback_on_step_end_tensor_inputs`.
|
1025
|
+
callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
|
1026
|
+
A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
|
1027
|
+
each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
|
1028
|
+
DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1029
|
+
list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
|
1000
1030
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
1001
1031
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
1002
1032
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
1003
|
-
`._callback_tensor_inputs` attribute of your
|
1033
|
+
`._callback_tensor_inputs` attribute of your pipeline class.
|
1004
1034
|
|
1005
1035
|
Examples:
|
1006
1036
|
|
@@ -1028,6 +1058,9 @@ class StableDiffusionControlNetPipeline(
|
|
1028
1058
|
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
|
1029
1059
|
)
|
1030
1060
|
|
1061
|
+
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
|
1062
|
+
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
1063
|
+
|
1031
1064
|
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
1032
1065
|
|
1033
1066
|
# align format for control guidance
|
@@ -1155,7 +1188,9 @@ class StableDiffusionControlNetPipeline(
|
|
1155
1188
|
assert False
|
1156
1189
|
|
1157
1190
|
# 5. Prepare timesteps
|
1158
|
-
timesteps, num_inference_steps = retrieve_timesteps(
|
1191
|
+
timesteps, num_inference_steps = retrieve_timesteps(
|
1192
|
+
self.scheduler, num_inference_steps, device, timesteps, sigmas
|
1193
|
+
)
|
1159
1194
|
self._num_timesteps = len(timesteps)
|
1160
1195
|
|
1161
1196
|
# 6. Prepare latent variables
|
@@ -240,7 +240,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
|
240
240
|
condtioning_image: PIL.Image.Image,
|
241
241
|
source_subject_category: List[str],
|
242
242
|
target_subject_category: List[str],
|
243
|
-
latents: Optional[torch.
|
243
|
+
latents: Optional[torch.Tensor] = None,
|
244
244
|
guidance_scale: float = 7.5,
|
245
245
|
height: int = 512,
|
246
246
|
width: int = 512,
|
@@ -266,7 +266,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
|
266
266
|
The source subject category.
|
267
267
|
target_subject_category (`List[str]`):
|
268
268
|
The target subject category.
|
269
|
-
latents (`torch.
|
269
|
+
latents (`torch.Tensor`, *optional*):
|
270
270
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
271
271
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
272
272
|
tensor will ge generated by random sampling.
|
@@ -21,6 +21,7 @@ import torch
|
|
21
21
|
import torch.nn.functional as F
|
22
22
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
23
23
|
|
24
|
+
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
24
25
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
25
26
|
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
26
27
|
from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
|
@@ -239,8 +240,8 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
239
240
|
num_images_per_prompt,
|
240
241
|
do_classifier_free_guidance,
|
241
242
|
negative_prompt=None,
|
242
|
-
prompt_embeds: Optional[torch.
|
243
|
-
negative_prompt_embeds: Optional[torch.
|
243
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
244
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
244
245
|
lora_scale: Optional[float] = None,
|
245
246
|
**kwargs,
|
246
247
|
):
|
@@ -272,8 +273,8 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
272
273
|
num_images_per_prompt,
|
273
274
|
do_classifier_free_guidance,
|
274
275
|
negative_prompt=None,
|
275
|
-
prompt_embeds: Optional[torch.
|
276
|
-
negative_prompt_embeds: Optional[torch.
|
276
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
277
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
277
278
|
lora_scale: Optional[float] = None,
|
278
279
|
clip_skip: Optional[int] = None,
|
279
280
|
):
|
@@ -293,10 +294,10 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
293
294
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
294
295
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
295
296
|
less than `1`).
|
296
|
-
prompt_embeds (`torch.
|
297
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
297
298
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
298
299
|
provided, text embeddings will be generated from `prompt` input argument.
|
299
|
-
negative_prompt_embeds (`torch.
|
300
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
300
301
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
301
302
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
302
303
|
argument.
|
@@ -904,11 +905,11 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
904
905
|
num_images_per_prompt: Optional[int] = 1,
|
905
906
|
eta: float = 0.0,
|
906
907
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
907
|
-
latents: Optional[torch.
|
908
|
-
prompt_embeds: Optional[torch.
|
909
|
-
negative_prompt_embeds: Optional[torch.
|
908
|
+
latents: Optional[torch.Tensor] = None,
|
909
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
910
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
910
911
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
911
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
912
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
912
913
|
output_type: Optional[str] = "pil",
|
913
914
|
return_dict: bool = True,
|
914
915
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -917,7 +918,9 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
917
918
|
control_guidance_start: Union[float, List[float]] = 0.0,
|
918
919
|
control_guidance_end: Union[float, List[float]] = 1.0,
|
919
920
|
clip_skip: Optional[int] = None,
|
920
|
-
callback_on_step_end: Optional[
|
921
|
+
callback_on_step_end: Optional[
|
922
|
+
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
923
|
+
] = None,
|
921
924
|
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
922
925
|
**kwargs,
|
923
926
|
):
|
@@ -927,18 +930,18 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
927
930
|
Args:
|
928
931
|
prompt (`str` or `List[str]`, *optional*):
|
929
932
|
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
930
|
-
image (`torch.
|
931
|
-
`List[List[torch.
|
933
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
|
934
|
+
`List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
|
932
935
|
The initial image to be used as the starting point for the image generation process. Can also accept
|
933
936
|
image latents as `image`, and if passing latents directly they are not encoded again.
|
934
|
-
control_image (`torch.
|
935
|
-
`List[List[torch.
|
937
|
+
control_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
|
938
|
+
`List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
|
936
939
|
The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
|
937
|
-
specified as `torch.
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
940
|
+
specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
|
941
|
+
as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
|
942
|
+
width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
|
943
|
+
images must be passed as a list such that each element of the list can be correctly batched for input
|
944
|
+
to a single ControlNet.
|
942
945
|
height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
943
946
|
The height in pixels of the generated image.
|
944
947
|
width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
@@ -966,22 +969,22 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
966
969
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
967
970
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
968
971
|
generation deterministic.
|
969
|
-
latents (`torch.
|
972
|
+
latents (`torch.Tensor`, *optional*):
|
970
973
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
971
974
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
972
975
|
tensor is generated by sampling using the supplied random `generator`.
|
973
|
-
prompt_embeds (`torch.
|
976
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
974
977
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
975
978
|
provided, text embeddings are generated from the `prompt` input argument.
|
976
|
-
negative_prompt_embeds (`torch.
|
979
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
977
980
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
978
981
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
979
982
|
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
980
|
-
ip_adapter_image_embeds (`List[torch.
|
981
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
982
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
983
|
-
if `do_classifier_free_guidance` is set to `True`.
|
984
|
-
|
983
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
984
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
985
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
986
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
987
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
985
988
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
986
989
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
987
990
|
return_dict (`bool`, *optional*, defaults to `True`):
|
@@ -1004,15 +1007,15 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
1004
1007
|
clip_skip (`int`, *optional*):
|
1005
1008
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
1006
1009
|
the output of the pre-final layer will be used for computing the prompt embeddings.
|
1007
|
-
callback_on_step_end (`Callable`, *optional*):
|
1008
|
-
A function
|
1009
|
-
with the following arguments: `callback_on_step_end(self:
|
1010
|
-
callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1011
|
-
`callback_on_step_end_tensor_inputs`.
|
1010
|
+
callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
|
1011
|
+
A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
|
1012
|
+
each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
|
1013
|
+
DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1014
|
+
list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
|
1012
1015
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
1013
1016
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
1014
1017
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
1015
|
-
`._callback_tensor_inputs` attribute of your
|
1018
|
+
`._callback_tensor_inputs` attribute of your pipeline class.
|
1016
1019
|
|
1017
1020
|
Examples:
|
1018
1021
|
|
@@ -1040,6 +1043,9 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
1040
1043
|
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
|
1041
1044
|
)
|
1042
1045
|
|
1046
|
+
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
|
1047
|
+
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
1048
|
+
|
1043
1049
|
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
1044
1050
|
|
1045
1051
|
# align format for control guidance
|
@@ -1169,15 +1175,16 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
1169
1175
|
self._num_timesteps = len(timesteps)
|
1170
1176
|
|
1171
1177
|
# 6. Prepare latent variables
|
1172
|
-
latents
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
|
1180
|
-
|
1178
|
+
if latents is None:
|
1179
|
+
latents = self.prepare_latents(
|
1180
|
+
image,
|
1181
|
+
latent_timestep,
|
1182
|
+
batch_size,
|
1183
|
+
num_images_per_prompt,
|
1184
|
+
prompt_embeds.dtype,
|
1185
|
+
device,
|
1186
|
+
generator,
|
1187
|
+
)
|
1181
1188
|
|
1182
1189
|
# 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
1183
1190
|
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|