diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -166,8 +166,8 @@ class StableUnCLIPImg2ImgPipeline(
|
|
166
166
|
num_images_per_prompt,
|
167
167
|
do_classifier_free_guidance,
|
168
168
|
negative_prompt=None,
|
169
|
-
prompt_embeds: Optional[torch.
|
170
|
-
negative_prompt_embeds: Optional[torch.
|
169
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
170
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
171
171
|
lora_scale: Optional[float] = None,
|
172
172
|
**kwargs,
|
173
173
|
):
|
@@ -254,8 +254,8 @@ class StableUnCLIPImg2ImgPipeline(
|
|
254
254
|
num_images_per_prompt,
|
255
255
|
do_classifier_free_guidance,
|
256
256
|
negative_prompt=None,
|
257
|
-
prompt_embeds: Optional[torch.
|
258
|
-
negative_prompt_embeds: Optional[torch.
|
257
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
258
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
259
259
|
lora_scale: Optional[float] = None,
|
260
260
|
clip_skip: Optional[int] = None,
|
261
261
|
):
|
@@ -275,10 +275,10 @@ class StableUnCLIPImg2ImgPipeline(
|
|
275
275
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
276
276
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
277
277
|
less than `1`).
|
278
|
-
prompt_embeds (`torch.
|
278
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
279
279
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
280
280
|
provided, text embeddings will be generated from `prompt` input argument.
|
281
|
-
negative_prompt_embeds (`torch.
|
281
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
282
282
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
283
283
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
284
284
|
argument.
|
@@ -537,13 +537,18 @@ class StableUnCLIPImg2ImgPipeline(
|
|
537
537
|
and not isinstance(image, list)
|
538
538
|
):
|
539
539
|
raise ValueError(
|
540
|
-
"`image` has to be of type `torch.
|
540
|
+
"`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
|
541
541
|
f" {type(image)}"
|
542
542
|
)
|
543
543
|
|
544
544
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
545
545
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
546
|
-
shape = (
|
546
|
+
shape = (
|
547
|
+
batch_size,
|
548
|
+
num_channels_latents,
|
549
|
+
int(height) // self.vae_scale_factor,
|
550
|
+
int(width) // self.vae_scale_factor,
|
551
|
+
)
|
547
552
|
if isinstance(generator, list) and len(generator) != batch_size:
|
548
553
|
raise ValueError(
|
549
554
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -564,7 +569,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
564
569
|
self,
|
565
570
|
image_embeds: torch.Tensor,
|
566
571
|
noise_level: int,
|
567
|
-
noise: Optional[torch.
|
572
|
+
noise: Optional[torch.Tensor] = None,
|
568
573
|
generator: Optional[torch.Generator] = None,
|
569
574
|
):
|
570
575
|
"""
|
@@ -610,7 +615,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
610
615
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
611
616
|
def __call__(
|
612
617
|
self,
|
613
|
-
image: Union[torch.
|
618
|
+
image: Union[torch.Tensor, PIL.Image.Image] = None,
|
614
619
|
prompt: Union[str, List[str]] = None,
|
615
620
|
height: Optional[int] = None,
|
616
621
|
width: Optional[int] = None,
|
@@ -620,16 +625,16 @@ class StableUnCLIPImg2ImgPipeline(
|
|
620
625
|
num_images_per_prompt: Optional[int] = 1,
|
621
626
|
eta: float = 0.0,
|
622
627
|
generator: Optional[torch.Generator] = None,
|
623
|
-
latents: Optional[torch.
|
624
|
-
prompt_embeds: Optional[torch.
|
625
|
-
negative_prompt_embeds: Optional[torch.
|
628
|
+
latents: Optional[torch.Tensor] = None,
|
629
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
630
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
626
631
|
output_type: Optional[str] = "pil",
|
627
632
|
return_dict: bool = True,
|
628
|
-
callback: Optional[Callable[[int, int, torch.
|
633
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
629
634
|
callback_steps: int = 1,
|
630
635
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
631
636
|
noise_level: int = 0,
|
632
|
-
image_embeds: Optional[torch.
|
637
|
+
image_embeds: Optional[torch.Tensor] = None,
|
633
638
|
clip_skip: Optional[int] = None,
|
634
639
|
):
|
635
640
|
r"""
|
@@ -639,7 +644,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
639
644
|
prompt (`str` or `List[str]`, *optional*):
|
640
645
|
The prompt or prompts to guide the image generation. If not defined, either `prompt_embeds` will be
|
641
646
|
used or prompt is initialized to `""`.
|
642
|
-
image (`torch.
|
647
|
+
image (`torch.Tensor` or `PIL.Image.Image`):
|
643
648
|
`Image` or tensor representing an image batch. The image is encoded to its CLIP embedding which the
|
644
649
|
`unet` is conditioned on. The image is _not_ encoded by the `vae` and then used as the latents in the
|
645
650
|
denoising process like it is in the standard Stable Diffusion text-guided image variation process.
|
@@ -664,14 +669,14 @@ class StableUnCLIPImg2ImgPipeline(
|
|
664
669
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
665
670
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
666
671
|
generation deterministic.
|
667
|
-
latents (`torch.
|
672
|
+
latents (`torch.Tensor`, *optional*):
|
668
673
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
669
674
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
670
675
|
tensor is generated by sampling using the supplied random `generator`.
|
671
|
-
prompt_embeds (`torch.
|
676
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
672
677
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
673
678
|
provided, text embeddings are generated from the `prompt` input argument.
|
674
|
-
negative_prompt_embeds (`torch.
|
679
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
675
680
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
676
681
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
677
682
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
@@ -680,7 +685,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
680
685
|
Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
|
681
686
|
callback (`Callable`, *optional*):
|
682
687
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
683
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
688
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
684
689
|
callback_steps (`int`, *optional*, defaults to 1):
|
685
690
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
686
691
|
every step.
|
@@ -690,7 +695,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
690
695
|
noise_level (`int`, *optional*, defaults to `0`):
|
691
696
|
The amount of noise to add to the image embeddings. A higher `noise_level` increases the variance in
|
692
697
|
the final un-noised images. See [`StableUnCLIPPipeline.noise_image_embeddings`] for more details.
|
693
|
-
image_embeds (`torch.
|
698
|
+
image_embeds (`torch.Tensor`, *optional*):
|
694
699
|
Pre-generated CLIP embeddings to condition the `unet` on. These latents are not used in the denoising
|
695
700
|
process. If you want to provide pre-generated latents, pass them to `__call__` as `latents`.
|
696
701
|
clip_skip (`int`, *optional*):
|
@@ -781,16 +786,17 @@ class StableUnCLIPImg2ImgPipeline(
|
|
781
786
|
|
782
787
|
# 6. Prepare latent variables
|
783
788
|
num_channels_latents = self.unet.config.in_channels
|
784
|
-
latents
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
789
|
+
if latents is None:
|
790
|
+
latents = self.prepare_latents(
|
791
|
+
batch_size=batch_size,
|
792
|
+
num_channels_latents=num_channels_latents,
|
793
|
+
height=height,
|
794
|
+
width=width,
|
795
|
+
dtype=prompt_embeds.dtype,
|
796
|
+
device=device,
|
797
|
+
generator=generator,
|
798
|
+
latents=latents,
|
799
|
+
)
|
794
800
|
|
795
801
|
# 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
796
802
|
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
@@ -31,6 +31,7 @@ def cosine_distance(image_embeds, text_embeds):
|
|
31
31
|
|
32
32
|
class StableDiffusionSafetyChecker(PreTrainedModel):
|
33
33
|
config_class = CLIPConfig
|
34
|
+
main_input_name = "clip_input"
|
34
35
|
|
35
36
|
_no_split_modules = ["CLIPEncoderLayer"]
|
36
37
|
|
@@ -99,7 +100,7 @@ class StableDiffusionSafetyChecker(PreTrainedModel):
|
|
99
100
|
return images, has_nsfw_concepts
|
100
101
|
|
101
102
|
@torch.no_grad()
|
102
|
-
def forward_onnx(self, clip_input: torch.
|
103
|
+
def forward_onnx(self, clip_input: torch.Tensor, images: torch.Tensor):
|
103
104
|
pooled_output = self.vision_model(clip_input)[1] # pooled_output
|
104
105
|
image_embeds = self.visual_projection(pooled_output)
|
105
106
|
|
@@ -254,8 +254,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
254
254
|
num_images_per_prompt,
|
255
255
|
do_classifier_free_guidance,
|
256
256
|
negative_prompt=None,
|
257
|
-
prompt_embeds: Optional[torch.
|
258
|
-
negative_prompt_embeds: Optional[torch.
|
257
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
258
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
259
259
|
lora_scale: Optional[float] = None,
|
260
260
|
**kwargs,
|
261
261
|
):
|
@@ -287,8 +287,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
287
287
|
num_images_per_prompt,
|
288
288
|
do_classifier_free_guidance,
|
289
289
|
negative_prompt=None,
|
290
|
-
prompt_embeds: Optional[torch.
|
291
|
-
negative_prompt_embeds: Optional[torch.
|
290
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
291
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
292
292
|
lora_scale: Optional[float] = None,
|
293
293
|
clip_skip: Optional[int] = None,
|
294
294
|
):
|
@@ -308,10 +308,10 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
308
308
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
309
309
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
310
310
|
less than `1`).
|
311
|
-
prompt_embeds (`torch.
|
311
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
312
312
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
313
313
|
provided, text embeddings will be generated from `prompt` input argument.
|
314
|
-
negative_prompt_embeds (`torch.
|
314
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
315
315
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
316
316
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
317
317
|
argument.
|
@@ -581,7 +581,12 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
581
581
|
|
582
582
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
583
583
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
584
|
-
shape = (
|
584
|
+
shape = (
|
585
|
+
batch_size,
|
586
|
+
num_channels_latents,
|
587
|
+
int(height) // self.vae_scale_factor,
|
588
|
+
int(width) // self.vae_scale_factor,
|
589
|
+
)
|
585
590
|
if isinstance(generator, list) and len(generator) != batch_size:
|
586
591
|
raise ValueError(
|
587
592
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -741,12 +746,12 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
741
746
|
num_images_per_prompt: int = 1,
|
742
747
|
eta: float = 0.0,
|
743
748
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
744
|
-
latents: Optional[torch.
|
745
|
-
prompt_embeds: Optional[torch.
|
746
|
-
negative_prompt_embeds: Optional[torch.
|
749
|
+
latents: Optional[torch.Tensor] = None,
|
750
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
751
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
747
752
|
output_type: Optional[str] = "pil",
|
748
753
|
return_dict: bool = True,
|
749
|
-
callback: Optional[Callable[[int, int, torch.
|
754
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
750
755
|
callback_steps: int = 1,
|
751
756
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
752
757
|
max_iter_to_alter: int = 25,
|
@@ -784,14 +789,14 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
784
789
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
785
790
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
786
791
|
generation deterministic.
|
787
|
-
latents (`torch.
|
792
|
+
latents (`torch.Tensor`, *optional*):
|
788
793
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
789
794
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
790
795
|
tensor is generated by sampling using the supplied random `generator`.
|
791
|
-
prompt_embeds (`torch.
|
796
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
792
797
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
793
798
|
provided, text embeddings are generated from the `prompt` input argument.
|
794
|
-
negative_prompt_embeds (`torch.
|
799
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
795
800
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
796
801
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
797
802
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
@@ -801,7 +806,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
801
806
|
plain tuple.
|
802
807
|
callback (`Callable`, *optional*):
|
803
808
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
804
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
809
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
805
810
|
callback_steps (`int`, *optional*, defaults to 1):
|
806
811
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
807
812
|
every step.
|
@@ -902,6 +907,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
902
907
|
if attn_res is None:
|
903
908
|
attn_res = int(np.ceil(width / 32)), int(np.ceil(height / 32))
|
904
909
|
self.attention_store = AttentionStore(attn_res)
|
910
|
+
original_attn_proc = self.unet.attn_processors
|
905
911
|
self.register_attention_control()
|
906
912
|
|
907
913
|
# default config for step size from original repo
|
@@ -1016,6 +1022,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
1016
1022
|
|
1017
1023
|
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
|
1018
1024
|
self.maybe_free_model_hooks()
|
1025
|
+
# make sure to set the original attention processors back
|
1026
|
+
self.unet.set_attn_processor(original_attn_proc)
|
1019
1027
|
|
1020
1028
|
if not return_dict:
|
1021
1029
|
return (image, has_nsfw_concept)
|
@@ -53,7 +53,7 @@ class DiffEditInversionPipelineOutput(BaseOutput):
|
|
53
53
|
Output class for Stable Diffusion pipelines.
|
54
54
|
|
55
55
|
Args:
|
56
|
-
latents (`torch.
|
56
|
+
latents (`torch.Tensor`)
|
57
57
|
inverted latents tensor
|
58
58
|
images (`List[PIL.Image.Image]` or `np.ndarray`)
|
59
59
|
List of denoised PIL images of length `num_timesteps * batch_size` or numpy array of shape `(num_timesteps,
|
@@ -61,7 +61,7 @@ class DiffEditInversionPipelineOutput(BaseOutput):
|
|
61
61
|
diffusion pipeline.
|
62
62
|
"""
|
63
63
|
|
64
|
-
latents: torch.
|
64
|
+
latents: torch.Tensor
|
65
65
|
images: Union[List[PIL.Image.Image], np.ndarray]
|
66
66
|
|
67
67
|
|
@@ -185,7 +185,7 @@ def preprocess(image):
|
|
185
185
|
def preprocess_mask(mask, batch_size: int = 1):
|
186
186
|
if not isinstance(mask, torch.Tensor):
|
187
187
|
# preprocess mask
|
188
|
-
if isinstance(mask, PIL.Image.Image
|
188
|
+
if isinstance(mask, (PIL.Image.Image, np.ndarray)):
|
189
189
|
mask = [mask]
|
190
190
|
|
191
191
|
if isinstance(mask, list):
|
@@ -381,8 +381,8 @@ class StableDiffusionDiffEditPipeline(
|
|
381
381
|
num_images_per_prompt,
|
382
382
|
do_classifier_free_guidance,
|
383
383
|
negative_prompt=None,
|
384
|
-
prompt_embeds: Optional[torch.
|
385
|
-
negative_prompt_embeds: Optional[torch.
|
384
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
385
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
386
386
|
lora_scale: Optional[float] = None,
|
387
387
|
**kwargs,
|
388
388
|
):
|
@@ -414,8 +414,8 @@ class StableDiffusionDiffEditPipeline(
|
|
414
414
|
num_images_per_prompt,
|
415
415
|
do_classifier_free_guidance,
|
416
416
|
negative_prompt=None,
|
417
|
-
prompt_embeds: Optional[torch.
|
418
|
-
negative_prompt_embeds: Optional[torch.
|
417
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
418
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
419
419
|
lora_scale: Optional[float] = None,
|
420
420
|
clip_skip: Optional[int] = None,
|
421
421
|
):
|
@@ -435,10 +435,10 @@ class StableDiffusionDiffEditPipeline(
|
|
435
435
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
436
436
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
437
437
|
less than `1`).
|
438
|
-
prompt_embeds (`torch.
|
438
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
439
439
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
440
440
|
provided, text embeddings will be generated from `prompt` input argument.
|
441
|
-
negative_prompt_embeds (`torch.
|
441
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
442
442
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
443
443
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
444
444
|
argument.
|
@@ -716,15 +716,12 @@ class StableDiffusionDiffEditPipeline(
|
|
716
716
|
f" `source_negative_prompt_embeds` {source_negative_prompt_embeds.shape}."
|
717
717
|
)
|
718
718
|
|
719
|
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
|
720
719
|
def get_timesteps(self, num_inference_steps, strength, device):
|
721
720
|
# get the original timestep using init_timestep
|
722
721
|
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
723
722
|
|
724
723
|
t_start = max(num_inference_steps - init_timestep, 0)
|
725
724
|
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
726
|
-
if hasattr(self.scheduler, "set_begin_index"):
|
727
|
-
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
728
725
|
|
729
726
|
return timesteps, num_inference_steps - t_start
|
730
727
|
|
@@ -743,7 +740,12 @@ class StableDiffusionDiffEditPipeline(
|
|
743
740
|
|
744
741
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
745
742
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
746
|
-
shape = (
|
743
|
+
shape = (
|
744
|
+
batch_size,
|
745
|
+
num_channels_latents,
|
746
|
+
int(height) // self.vae_scale_factor,
|
747
|
+
int(width) // self.vae_scale_factor,
|
748
|
+
)
|
747
749
|
if isinstance(generator, list) and len(generator) != batch_size:
|
748
750
|
raise ValueError(
|
749
751
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -829,15 +831,15 @@ class StableDiffusionDiffEditPipeline(
|
|
829
831
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
830
832
|
def generate_mask(
|
831
833
|
self,
|
832
|
-
image: Union[torch.
|
834
|
+
image: Union[torch.Tensor, PIL.Image.Image] = None,
|
833
835
|
target_prompt: Optional[Union[str, List[str]]] = None,
|
834
836
|
target_negative_prompt: Optional[Union[str, List[str]]] = None,
|
835
|
-
target_prompt_embeds: Optional[torch.
|
836
|
-
target_negative_prompt_embeds: Optional[torch.
|
837
|
+
target_prompt_embeds: Optional[torch.Tensor] = None,
|
838
|
+
target_negative_prompt_embeds: Optional[torch.Tensor] = None,
|
837
839
|
source_prompt: Optional[Union[str, List[str]]] = None,
|
838
840
|
source_negative_prompt: Optional[Union[str, List[str]]] = None,
|
839
|
-
source_prompt_embeds: Optional[torch.
|
840
|
-
source_negative_prompt_embeds: Optional[torch.
|
841
|
+
source_prompt_embeds: Optional[torch.Tensor] = None,
|
842
|
+
source_negative_prompt_embeds: Optional[torch.Tensor] = None,
|
841
843
|
num_maps_per_mask: Optional[int] = 10,
|
842
844
|
mask_encode_strength: Optional[float] = 0.5,
|
843
845
|
mask_thresholding_ratio: Optional[float] = 3.0,
|
@@ -859,10 +861,10 @@ class StableDiffusionDiffEditPipeline(
|
|
859
861
|
target_negative_prompt (`str` or `List[str]`, *optional*):
|
860
862
|
The prompt or prompts to guide what to not include in image generation. If not defined, you need to
|
861
863
|
pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
|
862
|
-
target_prompt_embeds (`torch.
|
864
|
+
target_prompt_embeds (`torch.Tensor`, *optional*):
|
863
865
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
864
866
|
provided, text embeddings are generated from the `prompt` input argument.
|
865
|
-
target_negative_prompt_embeds (`torch.
|
867
|
+
target_negative_prompt_embeds (`torch.Tensor`, *optional*):
|
866
868
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
867
869
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
868
870
|
source_prompt (`str` or `List[str]`, *optional*):
|
@@ -871,11 +873,11 @@ class StableDiffusionDiffEditPipeline(
|
|
871
873
|
source_negative_prompt (`str` or `List[str]`, *optional*):
|
872
874
|
The prompt or prompts to guide semantic mask generation away from using DiffEdit. If not defined, you
|
873
875
|
need to pass `source_negative_prompt_embeds` or `source_image` instead.
|
874
|
-
source_prompt_embeds (`torch.
|
876
|
+
source_prompt_embeds (`torch.Tensor`, *optional*):
|
875
877
|
Pre-generated text embeddings to guide the semantic mask generation. Can be used to easily tweak text
|
876
878
|
inputs (prompt weighting). If not provided, text embeddings are generated from `source_prompt` input
|
877
879
|
argument.
|
878
|
-
source_negative_prompt_embeds (`torch.
|
880
|
+
source_negative_prompt_embeds (`torch.Tensor`, *optional*):
|
879
881
|
Pre-generated text embeddings to negatively guide the semantic mask generation. Can be used to easily
|
880
882
|
tweak text inputs (prompt weighting). If not provided, text embeddings are generated from
|
881
883
|
`source_negative_prompt` input argument.
|
@@ -1049,18 +1051,18 @@ class StableDiffusionDiffEditPipeline(
|
|
1049
1051
|
def invert(
|
1050
1052
|
self,
|
1051
1053
|
prompt: Optional[Union[str, List[str]]] = None,
|
1052
|
-
image: Union[torch.
|
1054
|
+
image: Union[torch.Tensor, PIL.Image.Image] = None,
|
1053
1055
|
num_inference_steps: int = 50,
|
1054
1056
|
inpaint_strength: float = 0.8,
|
1055
1057
|
guidance_scale: float = 7.5,
|
1056
1058
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
1057
1059
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
1058
|
-
prompt_embeds: Optional[torch.
|
1059
|
-
negative_prompt_embeds: Optional[torch.
|
1060
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
1061
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
1060
1062
|
decode_latents: bool = False,
|
1061
1063
|
output_type: Optional[str] = "pil",
|
1062
1064
|
return_dict: bool = True,
|
1063
|
-
callback: Optional[Callable[[int, int, torch.
|
1065
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
1064
1066
|
callback_steps: Optional[int] = 1,
|
1065
1067
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
1066
1068
|
lambda_auto_corr: float = 20.0,
|
@@ -1093,10 +1095,10 @@ class StableDiffusionDiffEditPipeline(
|
|
1093
1095
|
generator (`torch.Generator`, *optional*):
|
1094
1096
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1095
1097
|
generation deterministic.
|
1096
|
-
prompt_embeds (`torch.
|
1098
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
1097
1099
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
1098
1100
|
provided, text embeddings are generated from the `prompt` input argument.
|
1099
|
-
negative_prompt_embeds (`torch.
|
1101
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
1100
1102
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
1101
1103
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
1102
1104
|
decode_latents (`bool`, *optional*, defaults to `False`):
|
@@ -1109,7 +1111,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1109
1111
|
plain tuple.
|
1110
1112
|
callback (`Callable`, *optional*):
|
1111
1113
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
1112
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
1114
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
1113
1115
|
callback_steps (`int`, *optional*, defaults to 1):
|
1114
1116
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
1115
1117
|
every step.
|
@@ -1287,8 +1289,8 @@ class StableDiffusionDiffEditPipeline(
|
|
1287
1289
|
def __call__(
|
1288
1290
|
self,
|
1289
1291
|
prompt: Optional[Union[str, List[str]]] = None,
|
1290
|
-
mask_image: Union[torch.
|
1291
|
-
image_latents: Union[torch.
|
1292
|
+
mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
|
1293
|
+
image_latents: Union[torch.Tensor, PIL.Image.Image] = None,
|
1292
1294
|
inpaint_strength: Optional[float] = 0.8,
|
1293
1295
|
num_inference_steps: int = 50,
|
1294
1296
|
guidance_scale: float = 7.5,
|
@@ -1296,15 +1298,15 @@ class StableDiffusionDiffEditPipeline(
|
|
1296
1298
|
num_images_per_prompt: Optional[int] = 1,
|
1297
1299
|
eta: float = 0.0,
|
1298
1300
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
1299
|
-
latents: Optional[torch.
|
1300
|
-
prompt_embeds: Optional[torch.
|
1301
|
-
negative_prompt_embeds: Optional[torch.
|
1301
|
+
latents: Optional[torch.Tensor] = None,
|
1302
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
1303
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
1302
1304
|
output_type: Optional[str] = "pil",
|
1303
1305
|
return_dict: bool = True,
|
1304
|
-
callback: Optional[Callable[[int, int, torch.
|
1306
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
1305
1307
|
callback_steps: int = 1,
|
1306
1308
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
1307
|
-
|
1309
|
+
clip_skip: int = None,
|
1308
1310
|
):
|
1309
1311
|
r"""
|
1310
1312
|
The call function to the pipeline for generation.
|
@@ -1317,7 +1319,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1317
1319
|
repainted, while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
|
1318
1320
|
single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
|
1319
1321
|
instead of 3, so the expected shape would be `(B, 1, H, W)`.
|
1320
|
-
image_latents (`PIL.Image.Image` or `torch.
|
1322
|
+
image_latents (`PIL.Image.Image` or `torch.Tensor`):
|
1321
1323
|
Partially noised image latents from the inversion process to be used as inputs for image generation.
|
1322
1324
|
inpaint_strength (`float`, *optional*, defaults to 0.8):
|
1323
1325
|
Indicates extent to inpaint the masked area. Must be between 0 and 1. When `inpaint_strength` is 1, the
|
@@ -1341,14 +1343,14 @@ class StableDiffusionDiffEditPipeline(
|
|
1341
1343
|
generator (`torch.Generator`, *optional*):
|
1342
1344
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1343
1345
|
generation deterministic.
|
1344
|
-
latents (`torch.
|
1346
|
+
latents (`torch.Tensor`, *optional*):
|
1345
1347
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
1346
1348
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
1347
1349
|
tensor is generated by sampling using the supplied random `generator`.
|
1348
|
-
prompt_embeds (`torch.
|
1350
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
1349
1351
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
1350
1352
|
provided, text embeddings are generated from the `prompt` input argument.
|
1351
|
-
negative_prompt_embeds (`torch.
|
1353
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
1352
1354
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
1353
1355
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
1354
1356
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
@@ -1358,7 +1360,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1358
1360
|
plain tuple.
|
1359
1361
|
callback (`Callable`, *optional*):
|
1360
1362
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
1361
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
1363
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
1362
1364
|
callback_steps (`int`, *optional*, defaults to 1):
|
1363
1365
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
1364
1366
|
every step.
|
@@ -1426,7 +1428,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1426
1428
|
prompt_embeds=prompt_embeds,
|
1427
1429
|
negative_prompt_embeds=negative_prompt_embeds,
|
1428
1430
|
lora_scale=text_encoder_lora_scale,
|
1429
|
-
clip_skip=
|
1431
|
+
clip_skip=clip_skip,
|
1430
1432
|
)
|
1431
1433
|
# For classifier free guidance, we need to do two forward passes.
|
1432
1434
|
# Here we concatenate the unconditional and text embeddings into a single batch
|