diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -23,6 +23,7 @@ import torch
|
|
23
23
|
import torch.nn.functional as F
|
24
24
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
25
25
|
|
26
|
+
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
26
27
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
27
28
|
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
28
29
|
from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
|
@@ -364,8 +365,8 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
364
365
|
num_images_per_prompt,
|
365
366
|
do_classifier_free_guidance,
|
366
367
|
negative_prompt=None,
|
367
|
-
prompt_embeds: Optional[torch.
|
368
|
-
negative_prompt_embeds: Optional[torch.
|
368
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
369
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
369
370
|
lora_scale: Optional[float] = None,
|
370
371
|
**kwargs,
|
371
372
|
):
|
@@ -397,8 +398,8 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
397
398
|
num_images_per_prompt,
|
398
399
|
do_classifier_free_guidance,
|
399
400
|
negative_prompt=None,
|
400
|
-
prompt_embeds: Optional[torch.
|
401
|
-
negative_prompt_embeds: Optional[torch.
|
401
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
402
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
402
403
|
lora_scale: Optional[float] = None,
|
403
404
|
clip_skip: Optional[int] = None,
|
404
405
|
):
|
@@ -418,10 +419,10 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
418
419
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
419
420
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
420
421
|
less than `1`).
|
421
|
-
prompt_embeds (`torch.
|
422
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
422
423
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
423
424
|
provided, text embeddings will be generated from `prompt` input argument.
|
424
|
-
negative_prompt_embeds (`torch.
|
425
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
425
426
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
426
427
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
427
428
|
argument.
|
@@ -972,7 +973,12 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
972
973
|
return_noise=False,
|
973
974
|
return_image_latents=False,
|
974
975
|
):
|
975
|
-
shape = (
|
976
|
+
shape = (
|
977
|
+
batch_size,
|
978
|
+
num_channels_latents,
|
979
|
+
int(height) // self.vae_scale_factor,
|
980
|
+
int(width) // self.vae_scale_factor,
|
981
|
+
)
|
976
982
|
if isinstance(generator, list) and len(generator) != batch_size:
|
977
983
|
raise ValueError(
|
978
984
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -1116,11 +1122,11 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1116
1122
|
num_images_per_prompt: Optional[int] = 1,
|
1117
1123
|
eta: float = 0.0,
|
1118
1124
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
1119
|
-
latents: Optional[torch.
|
1120
|
-
prompt_embeds: Optional[torch.
|
1121
|
-
negative_prompt_embeds: Optional[torch.
|
1125
|
+
latents: Optional[torch.Tensor] = None,
|
1126
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
1127
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
1122
1128
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
1123
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
1129
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
1124
1130
|
output_type: Optional[str] = "pil",
|
1125
1131
|
return_dict: bool = True,
|
1126
1132
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -1129,7 +1135,9 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1129
1135
|
control_guidance_start: Union[float, List[float]] = 0.0,
|
1130
1136
|
control_guidance_end: Union[float, List[float]] = 1.0,
|
1131
1137
|
clip_skip: Optional[int] = None,
|
1132
|
-
callback_on_step_end: Optional[
|
1138
|
+
callback_on_step_end: Optional[
|
1139
|
+
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
1140
|
+
] = None,
|
1133
1141
|
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
1134
1142
|
**kwargs,
|
1135
1143
|
):
|
@@ -1139,14 +1147,14 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1139
1147
|
Args:
|
1140
1148
|
prompt (`str` or `List[str]`, *optional*):
|
1141
1149
|
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
1142
|
-
image (`torch.
|
1150
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`,
|
1143
1151
|
`List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
1144
1152
|
`Image`, NumPy array or tensor representing an image batch to be used as the starting point. For both
|
1145
1153
|
NumPy array and PyTorch tensor, the expected value range is between `[0, 1]`. If it's a tensor or a
|
1146
1154
|
list or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a NumPy array or
|
1147
1155
|
a list of arrays, the expected shape should be `(B, H, W, C)` or `(H, W, C)`. It can also accept image
|
1148
1156
|
latents as `image`, but if passing latents directly it is not encoded again.
|
1149
|
-
mask_image (`torch.
|
1157
|
+
mask_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`,
|
1150
1158
|
`List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
1151
1159
|
`Image`, NumPy array or tensor representing an image batch to mask `image`. White pixels in the mask
|
1152
1160
|
are repainted while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
|
@@ -1154,24 +1162,25 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1154
1162
|
color channel (L) instead of 3, so the expected shape for PyTorch tensor would be `(B, 1, H, W)`, `(B,
|
1155
1163
|
H, W)`, `(1, H, W)`, `(H, W)`. And for NumPy array, it would be for `(B, H, W, 1)`, `(B, H, W)`, `(H,
|
1156
1164
|
W, 1)`, or `(H, W)`.
|
1157
|
-
control_image (`torch.
|
1158
|
-
`List[List[torch.
|
1165
|
+
control_image (`torch.Tensor`, `PIL.Image.Image`, `List[torch.Tensor]`, `List[PIL.Image.Image]`,
|
1166
|
+
`List[List[torch.Tensor]]`, or `List[List[PIL.Image.Image]]`):
|
1159
1167
|
The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
|
1160
|
-
specified as `torch.
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1168
|
+
specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
|
1169
|
+
as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
|
1170
|
+
width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
|
1171
|
+
images must be passed as a list such that each element of the list can be correctly batched for input
|
1172
|
+
to a single ControlNet.
|
1165
1173
|
height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
1166
1174
|
The height in pixels of the generated image.
|
1167
1175
|
width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
1168
1176
|
The width in pixels of the generated image.
|
1169
1177
|
padding_mask_crop (`int`, *optional*, defaults to `None`):
|
1170
|
-
The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to
|
1171
|
-
`padding_mask_crop` is not `None`, it will first find a rectangular region
|
1172
|
-
contains all masked area, and then expand that area based
|
1173
|
-
|
1174
|
-
|
1178
|
+
The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to
|
1179
|
+
image and mask_image. If `padding_mask_crop` is not `None`, it will first find a rectangular region
|
1180
|
+
with the same aspect ration of the image and contains all masked area, and then expand that area based
|
1181
|
+
on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
|
1182
|
+
resizing to the original image size for inpainting. This is useful when the masked area is small while
|
1183
|
+
the image is large and contain information irrelevant for inpainting, such as background.
|
1175
1184
|
strength (`float`, *optional*, defaults to 1.0):
|
1176
1185
|
Indicates extent to transform the reference `image`. Must be between 0 and 1. `image` is used as a
|
1177
1186
|
starting point and more noise is added the higher the `strength`. The number of denoising steps depends
|
@@ -1195,22 +1204,22 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1195
1204
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1196
1205
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1197
1206
|
generation deterministic.
|
1198
|
-
latents (`torch.
|
1207
|
+
latents (`torch.Tensor`, *optional*):
|
1199
1208
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
1200
1209
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
1201
1210
|
tensor is generated by sampling using the supplied random `generator`.
|
1202
|
-
prompt_embeds (`torch.
|
1211
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
1203
1212
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
1204
1213
|
provided, text embeddings are generated from the `prompt` input argument.
|
1205
|
-
negative_prompt_embeds (`torch.
|
1214
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
1206
1215
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
1207
1216
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
1208
1217
|
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
1209
|
-
ip_adapter_image_embeds (`List[torch.
|
1210
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
1211
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
1212
|
-
if `do_classifier_free_guidance` is set to `True`.
|
1213
|
-
|
1218
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
1219
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
1220
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
1221
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
1222
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
1214
1223
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
1215
1224
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
1216
1225
|
return_dict (`bool`, *optional*, defaults to `True`):
|
@@ -1233,15 +1242,15 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1233
1242
|
clip_skip (`int`, *optional*):
|
1234
1243
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
1235
1244
|
the output of the pre-final layer will be used for computing the prompt embeddings.
|
1236
|
-
callback_on_step_end (`Callable`, *optional*):
|
1237
|
-
A function
|
1238
|
-
with the following arguments: `callback_on_step_end(self:
|
1239
|
-
callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1240
|
-
`callback_on_step_end_tensor_inputs`.
|
1245
|
+
callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
|
1246
|
+
A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
|
1247
|
+
each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
|
1248
|
+
DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1249
|
+
list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
|
1241
1250
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
1242
1251
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
1243
1252
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
1244
|
-
`._callback_tensor_inputs` attribute of your
|
1253
|
+
`._callback_tensor_inputs` attribute of your pipeline class.
|
1245
1254
|
|
1246
1255
|
Examples:
|
1247
1256
|
|
@@ -1269,6 +1278,9 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1269
1278
|
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
|
1270
1279
|
)
|
1271
1280
|
|
1281
|
+
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
|
1282
|
+
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
1283
|
+
|
1272
1284
|
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
1273
1285
|
|
1274
1286
|
# align format for control guidance
|
@@ -27,6 +27,7 @@ from transformers import (
|
|
27
27
|
CLIPVisionModelWithProjection,
|
28
28
|
)
|
29
29
|
|
30
|
+
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
30
31
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
31
32
|
from ...loaders import (
|
32
33
|
FromSingleFileMixin,
|
@@ -151,7 +152,12 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
151
152
|
|
152
153
|
|
153
154
|
class StableDiffusionXLControlNetInpaintPipeline(
|
154
|
-
DiffusionPipeline,
|
155
|
+
DiffusionPipeline,
|
156
|
+
StableDiffusionMixin,
|
157
|
+
StableDiffusionXLLoraLoaderMixin,
|
158
|
+
FromSingleFileMixin,
|
159
|
+
IPAdapterMixin,
|
160
|
+
TextualInversionLoaderMixin,
|
155
161
|
):
|
156
162
|
r"""
|
157
163
|
Pipeline for text-to-image generation using Stable Diffusion XL.
|
@@ -160,6 +166,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
160
166
|
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
161
167
|
|
162
168
|
The pipeline also inherits the following loading methods:
|
169
|
+
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
|
163
170
|
- [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
|
164
171
|
- [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
|
165
172
|
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
|
@@ -191,8 +198,26 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
191
198
|
"""
|
192
199
|
|
193
200
|
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
194
|
-
|
195
|
-
|
201
|
+
|
202
|
+
_optional_components = [
|
203
|
+
"tokenizer",
|
204
|
+
"tokenizer_2",
|
205
|
+
"text_encoder",
|
206
|
+
"text_encoder_2",
|
207
|
+
"image_encoder",
|
208
|
+
"feature_extractor",
|
209
|
+
]
|
210
|
+
_callback_tensor_inputs = [
|
211
|
+
"latents",
|
212
|
+
"prompt_embeds",
|
213
|
+
"negative_prompt_embeds",
|
214
|
+
"add_text_embeds",
|
215
|
+
"add_time_ids",
|
216
|
+
"negative_pooled_prompt_embeds",
|
217
|
+
"add_neg_time_ids",
|
218
|
+
"mask",
|
219
|
+
"masked_image_latents",
|
220
|
+
]
|
196
221
|
|
197
222
|
def __init__(
|
198
223
|
self,
|
@@ -202,7 +227,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
202
227
|
tokenizer: CLIPTokenizer,
|
203
228
|
tokenizer_2: CLIPTokenizer,
|
204
229
|
unet: UNet2DConditionModel,
|
205
|
-
controlnet: ControlNetModel,
|
230
|
+
controlnet: Union[ControlNetModel, List[ControlNetModel], Tuple[ControlNetModel], MultiControlNetModel],
|
206
231
|
scheduler: KarrasDiffusionSchedulers,
|
207
232
|
requires_aesthetics_score: bool = False,
|
208
233
|
force_zeros_for_empty_prompt: bool = True,
|
@@ -255,10 +280,10 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
255
280
|
do_classifier_free_guidance: bool = True,
|
256
281
|
negative_prompt: Optional[str] = None,
|
257
282
|
negative_prompt_2: Optional[str] = None,
|
258
|
-
prompt_embeds: Optional[torch.
|
259
|
-
negative_prompt_embeds: Optional[torch.
|
260
|
-
pooled_prompt_embeds: Optional[torch.
|
261
|
-
negative_pooled_prompt_embeds: Optional[torch.
|
283
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
284
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
285
|
+
pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
286
|
+
negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
262
287
|
lora_scale: Optional[float] = None,
|
263
288
|
clip_skip: Optional[int] = None,
|
264
289
|
):
|
@@ -284,17 +309,17 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
284
309
|
negative_prompt_2 (`str` or `List[str]`, *optional*):
|
285
310
|
The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
|
286
311
|
`text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
|
287
|
-
prompt_embeds (`torch.
|
312
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
288
313
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
289
314
|
provided, text embeddings will be generated from `prompt` input argument.
|
290
|
-
negative_prompt_embeds (`torch.
|
315
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
291
316
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
292
317
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
293
318
|
argument.
|
294
|
-
pooled_prompt_embeds (`torch.
|
319
|
+
pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
295
320
|
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
296
321
|
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
297
|
-
negative_pooled_prompt_embeds (`torch.
|
322
|
+
negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
298
323
|
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
299
324
|
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
300
325
|
input argument.
|
@@ -880,7 +905,12 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
880
905
|
return_noise=False,
|
881
906
|
return_image_latents=False,
|
882
907
|
):
|
883
|
-
shape = (
|
908
|
+
shape = (
|
909
|
+
batch_size,
|
910
|
+
num_channels_latents,
|
911
|
+
int(height) // self.vae_scale_factor,
|
912
|
+
int(width) // self.vae_scale_factor,
|
913
|
+
)
|
884
914
|
if isinstance(generator, list) and len(generator) != batch_size:
|
885
915
|
raise ValueError(
|
886
916
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -1022,7 +1052,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1022
1052
|
# because `num_inference_steps` might be even given that every timestep
|
1023
1053
|
# (except the highest one) is duplicated. If `num_inference_steps` is even it would
|
1024
1054
|
# mean that we cut the timesteps in the middle of the denoising step
|
1025
|
-
# (between 1st and 2nd
|
1055
|
+
# (between 1st and 2nd derivative) which leads to incorrect results. By adding 1
|
1026
1056
|
# we ensure that the denoising process always ends after the 2nd derivate step of the scheduler
|
1027
1057
|
num_inference_steps = num_inference_steps + 1
|
1028
1058
|
|
@@ -1146,13 +1176,13 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1146
1176
|
num_images_per_prompt: Optional[int] = 1,
|
1147
1177
|
eta: float = 0.0,
|
1148
1178
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
1149
|
-
latents: Optional[torch.
|
1150
|
-
prompt_embeds: Optional[torch.
|
1151
|
-
negative_prompt_embeds: Optional[torch.
|
1179
|
+
latents: Optional[torch.Tensor] = None,
|
1180
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
1181
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
1152
1182
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
1153
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
1154
|
-
pooled_prompt_embeds: Optional[torch.
|
1155
|
-
negative_pooled_prompt_embeds: Optional[torch.
|
1183
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
1184
|
+
pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
1185
|
+
negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
1156
1186
|
output_type: Optional[str] = "pil",
|
1157
1187
|
return_dict: bool = True,
|
1158
1188
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -1167,7 +1197,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1167
1197
|
aesthetic_score: float = 6.0,
|
1168
1198
|
negative_aesthetic_score: float = 2.5,
|
1169
1199
|
clip_skip: Optional[int] = None,
|
1170
|
-
callback_on_step_end: Optional[
|
1200
|
+
callback_on_step_end: Optional[
|
1201
|
+
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
1202
|
+
] = None,
|
1171
1203
|
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
1172
1204
|
**kwargs,
|
1173
1205
|
):
|
@@ -1194,11 +1226,12 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1194
1226
|
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
1195
1227
|
The width in pixels of the generated image.
|
1196
1228
|
padding_mask_crop (`int`, *optional*, defaults to `None`):
|
1197
|
-
The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to
|
1198
|
-
`padding_mask_crop` is not `None`, it will first find a rectangular region
|
1199
|
-
contains all masked area, and then expand that area based
|
1200
|
-
|
1201
|
-
|
1229
|
+
The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to
|
1230
|
+
image and mask_image. If `padding_mask_crop` is not `None`, it will first find a rectangular region
|
1231
|
+
with the same aspect ration of the image and contains all masked area, and then expand that area based
|
1232
|
+
on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
|
1233
|
+
resizing to the original image size for inpainting. This is useful when the masked area is small while
|
1234
|
+
the image is large and contain information irrelevant for inpainting, such as background.
|
1202
1235
|
strength (`float`, *optional*, defaults to 0.9999):
|
1203
1236
|
Conceptually, indicates how much to transform the masked portion of the reference `image`. Must be
|
1204
1237
|
between 0 and 1. `image` will be used as a starting point, adding more noise to it the larger the
|
@@ -1238,23 +1271,23 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1238
1271
|
negative_prompt_2 (`str` or `List[str]`, *optional*):
|
1239
1272
|
The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
|
1240
1273
|
`text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
|
1241
|
-
prompt_embeds (`torch.
|
1274
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
1242
1275
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
1243
1276
|
provided, text embeddings will be generated from `prompt` input argument.
|
1244
|
-
negative_prompt_embeds (`torch.
|
1277
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
1245
1278
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
1246
1279
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
1247
1280
|
argument.
|
1248
1281
|
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
1249
|
-
ip_adapter_image_embeds (`List[torch.
|
1250
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
1251
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
1252
|
-
if `do_classifier_free_guidance` is set to `True`.
|
1253
|
-
|
1254
|
-
pooled_prompt_embeds (`torch.
|
1282
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
1283
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
1284
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
1285
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
1286
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
1287
|
+
pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
1255
1288
|
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
1256
1289
|
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
1257
|
-
negative_pooled_prompt_embeds (`torch.
|
1290
|
+
negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
1258
1291
|
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
1259
1292
|
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
1260
1293
|
input argument.
|
@@ -1266,7 +1299,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1266
1299
|
generator (`torch.Generator`, *optional*):
|
1267
1300
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
1268
1301
|
to make generation deterministic.
|
1269
|
-
latents (`torch.
|
1302
|
+
latents (`torch.Tensor`, *optional*):
|
1270
1303
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
1271
1304
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
1272
1305
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -1305,15 +1338,15 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1305
1338
|
clip_skip (`int`, *optional*):
|
1306
1339
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
1307
1340
|
the output of the pre-final layer will be used for computing the prompt embeddings.
|
1308
|
-
callback_on_step_end (`Callable`, *optional*):
|
1309
|
-
A function
|
1310
|
-
with the following arguments: `callback_on_step_end(self:
|
1311
|
-
callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1312
|
-
`callback_on_step_end_tensor_inputs`.
|
1341
|
+
callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
|
1342
|
+
A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
|
1343
|
+
each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
|
1344
|
+
DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
|
1345
|
+
list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
|
1313
1346
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
1314
1347
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
1315
1348
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
1316
|
-
`._callback_tensor_inputs` attribute of your
|
1349
|
+
`._callback_tensor_inputs` attribute of your pipeline class.
|
1317
1350
|
|
1318
1351
|
Examples:
|
1319
1352
|
|
@@ -1339,6 +1372,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1339
1372
|
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
|
1340
1373
|
)
|
1341
1374
|
|
1375
|
+
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
|
1376
|
+
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
1377
|
+
|
1342
1378
|
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
1343
1379
|
|
1344
1380
|
# align format for control guidance
|
@@ -1601,10 +1637,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1601
1637
|
1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
|
1602
1638
|
for s, e in zip(control_guidance_start, control_guidance_end)
|
1603
1639
|
]
|
1604
|
-
if isinstance(
|
1605
|
-
controlnet_keep.append(keeps)
|
1606
|
-
else:
|
1607
|
-
controlnet_keep.append(keeps[0])
|
1640
|
+
controlnet_keep.append(keeps if isinstance(controlnet, MultiControlNetModel) else keeps[0])
|
1608
1641
|
|
1609
1642
|
# 9. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
1610
1643
|
height, width = latents.shape[-2:]
|
@@ -1721,7 +1754,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1721
1754
|
down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples]
|
1722
1755
|
mid_block_res_sample = torch.cat([torch.zeros_like(mid_block_res_sample), mid_block_res_sample])
|
1723
1756
|
|
1724
|
-
if ip_adapter_image is not None:
|
1757
|
+
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
1725
1758
|
added_cond_kwargs["image_embeds"] = image_embeds
|
1726
1759
|
|
1727
1760
|
if num_channels_unet == 9:
|