diffusers 0.27.2__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +19 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +20 -26
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +42 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
- diffusers/schedulers/scheduling_edm_euler.py +50 -31
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
- diffusers/schedulers/scheduling_euler_discrete.py +160 -68
- diffusers/schedulers/scheduling_heun_discrete.py +57 -39
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +24 -26
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/METADATA +47 -47
- diffusers-0.28.0.dist-info/RECORD +414 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/WHEEL +1 -1
- diffusers-0.27.2.dist-info/RECORD +0 -399
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -166,8 +166,8 @@ class StableUnCLIPImg2ImgPipeline(
|
|
166
166
|
num_images_per_prompt,
|
167
167
|
do_classifier_free_guidance,
|
168
168
|
negative_prompt=None,
|
169
|
-
prompt_embeds: Optional[torch.
|
170
|
-
negative_prompt_embeds: Optional[torch.
|
169
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
170
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
171
171
|
lora_scale: Optional[float] = None,
|
172
172
|
**kwargs,
|
173
173
|
):
|
@@ -254,8 +254,8 @@ class StableUnCLIPImg2ImgPipeline(
|
|
254
254
|
num_images_per_prompt,
|
255
255
|
do_classifier_free_guidance,
|
256
256
|
negative_prompt=None,
|
257
|
-
prompt_embeds: Optional[torch.
|
258
|
-
negative_prompt_embeds: Optional[torch.
|
257
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
258
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
259
259
|
lora_scale: Optional[float] = None,
|
260
260
|
clip_skip: Optional[int] = None,
|
261
261
|
):
|
@@ -275,10 +275,10 @@ class StableUnCLIPImg2ImgPipeline(
|
|
275
275
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
276
276
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
277
277
|
less than `1`).
|
278
|
-
prompt_embeds (`torch.
|
278
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
279
279
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
280
280
|
provided, text embeddings will be generated from `prompt` input argument.
|
281
|
-
negative_prompt_embeds (`torch.
|
281
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
282
282
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
283
283
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
284
284
|
argument.
|
@@ -537,13 +537,18 @@ class StableUnCLIPImg2ImgPipeline(
|
|
537
537
|
and not isinstance(image, list)
|
538
538
|
):
|
539
539
|
raise ValueError(
|
540
|
-
"`image` has to be of type `torch.
|
540
|
+
"`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
|
541
541
|
f" {type(image)}"
|
542
542
|
)
|
543
543
|
|
544
544
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
545
545
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
546
|
-
shape = (
|
546
|
+
shape = (
|
547
|
+
batch_size,
|
548
|
+
num_channels_latents,
|
549
|
+
int(height) // self.vae_scale_factor,
|
550
|
+
int(width) // self.vae_scale_factor,
|
551
|
+
)
|
547
552
|
if isinstance(generator, list) and len(generator) != batch_size:
|
548
553
|
raise ValueError(
|
549
554
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -564,7 +569,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
564
569
|
self,
|
565
570
|
image_embeds: torch.Tensor,
|
566
571
|
noise_level: int,
|
567
|
-
noise: Optional[torch.
|
572
|
+
noise: Optional[torch.Tensor] = None,
|
568
573
|
generator: Optional[torch.Generator] = None,
|
569
574
|
):
|
570
575
|
"""
|
@@ -610,7 +615,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
610
615
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
611
616
|
def __call__(
|
612
617
|
self,
|
613
|
-
image: Union[torch.
|
618
|
+
image: Union[torch.Tensor, PIL.Image.Image] = None,
|
614
619
|
prompt: Union[str, List[str]] = None,
|
615
620
|
height: Optional[int] = None,
|
616
621
|
width: Optional[int] = None,
|
@@ -620,16 +625,16 @@ class StableUnCLIPImg2ImgPipeline(
|
|
620
625
|
num_images_per_prompt: Optional[int] = 1,
|
621
626
|
eta: float = 0.0,
|
622
627
|
generator: Optional[torch.Generator] = None,
|
623
|
-
latents: Optional[torch.
|
624
|
-
prompt_embeds: Optional[torch.
|
625
|
-
negative_prompt_embeds: Optional[torch.
|
628
|
+
latents: Optional[torch.Tensor] = None,
|
629
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
630
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
626
631
|
output_type: Optional[str] = "pil",
|
627
632
|
return_dict: bool = True,
|
628
|
-
callback: Optional[Callable[[int, int, torch.
|
633
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
629
634
|
callback_steps: int = 1,
|
630
635
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
631
636
|
noise_level: int = 0,
|
632
|
-
image_embeds: Optional[torch.
|
637
|
+
image_embeds: Optional[torch.Tensor] = None,
|
633
638
|
clip_skip: Optional[int] = None,
|
634
639
|
):
|
635
640
|
r"""
|
@@ -639,7 +644,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
639
644
|
prompt (`str` or `List[str]`, *optional*):
|
640
645
|
The prompt or prompts to guide the image generation. If not defined, either `prompt_embeds` will be
|
641
646
|
used or prompt is initialized to `""`.
|
642
|
-
image (`torch.
|
647
|
+
image (`torch.Tensor` or `PIL.Image.Image`):
|
643
648
|
`Image` or tensor representing an image batch. The image is encoded to its CLIP embedding which the
|
644
649
|
`unet` is conditioned on. The image is _not_ encoded by the `vae` and then used as the latents in the
|
645
650
|
denoising process like it is in the standard Stable Diffusion text-guided image variation process.
|
@@ -664,14 +669,14 @@ class StableUnCLIPImg2ImgPipeline(
|
|
664
669
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
665
670
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
666
671
|
generation deterministic.
|
667
|
-
latents (`torch.
|
672
|
+
latents (`torch.Tensor`, *optional*):
|
668
673
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
669
674
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
670
675
|
tensor is generated by sampling using the supplied random `generator`.
|
671
|
-
prompt_embeds (`torch.
|
676
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
672
677
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
673
678
|
provided, text embeddings are generated from the `prompt` input argument.
|
674
|
-
negative_prompt_embeds (`torch.
|
679
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
675
680
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
676
681
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
677
682
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
@@ -680,7 +685,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
680
685
|
Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
|
681
686
|
callback (`Callable`, *optional*):
|
682
687
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
683
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
688
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
684
689
|
callback_steps (`int`, *optional*, defaults to 1):
|
685
690
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
686
691
|
every step.
|
@@ -690,7 +695,7 @@ class StableUnCLIPImg2ImgPipeline(
|
|
690
695
|
noise_level (`int`, *optional*, defaults to `0`):
|
691
696
|
The amount of noise to add to the image embeddings. A higher `noise_level` increases the variance in
|
692
697
|
the final un-noised images. See [`StableUnCLIPPipeline.noise_image_embeddings`] for more details.
|
693
|
-
image_embeds (`torch.
|
698
|
+
image_embeds (`torch.Tensor`, *optional*):
|
694
699
|
Pre-generated CLIP embeddings to condition the `unet` on. These latents are not used in the denoising
|
695
700
|
process. If you want to provide pre-generated latents, pass them to `__call__` as `latents`.
|
696
701
|
clip_skip (`int`, *optional*):
|
@@ -781,16 +786,17 @@ class StableUnCLIPImg2ImgPipeline(
|
|
781
786
|
|
782
787
|
# 6. Prepare latent variables
|
783
788
|
num_channels_latents = self.unet.config.in_channels
|
784
|
-
latents
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
789
|
+
if latents is None:
|
790
|
+
latents = self.prepare_latents(
|
791
|
+
batch_size=batch_size,
|
792
|
+
num_channels_latents=num_channels_latents,
|
793
|
+
height=height,
|
794
|
+
width=width,
|
795
|
+
dtype=prompt_embeds.dtype,
|
796
|
+
device=device,
|
797
|
+
generator=generator,
|
798
|
+
latents=latents,
|
799
|
+
)
|
794
800
|
|
795
801
|
# 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
796
802
|
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
@@ -31,6 +31,7 @@ def cosine_distance(image_embeds, text_embeds):
|
|
31
31
|
|
32
32
|
class StableDiffusionSafetyChecker(PreTrainedModel):
|
33
33
|
config_class = CLIPConfig
|
34
|
+
main_input_name = "clip_input"
|
34
35
|
|
35
36
|
_no_split_modules = ["CLIPEncoderLayer"]
|
36
37
|
|
@@ -99,7 +100,7 @@ class StableDiffusionSafetyChecker(PreTrainedModel):
|
|
99
100
|
return images, has_nsfw_concepts
|
100
101
|
|
101
102
|
@torch.no_grad()
|
102
|
-
def forward_onnx(self, clip_input: torch.
|
103
|
+
def forward_onnx(self, clip_input: torch.Tensor, images: torch.Tensor):
|
103
104
|
pooled_output = self.vision_model(clip_input)[1] # pooled_output
|
104
105
|
image_embeds = self.visual_projection(pooled_output)
|
105
106
|
|
@@ -254,8 +254,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
254
254
|
num_images_per_prompt,
|
255
255
|
do_classifier_free_guidance,
|
256
256
|
negative_prompt=None,
|
257
|
-
prompt_embeds: Optional[torch.
|
258
|
-
negative_prompt_embeds: Optional[torch.
|
257
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
258
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
259
259
|
lora_scale: Optional[float] = None,
|
260
260
|
**kwargs,
|
261
261
|
):
|
@@ -287,8 +287,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
287
287
|
num_images_per_prompt,
|
288
288
|
do_classifier_free_guidance,
|
289
289
|
negative_prompt=None,
|
290
|
-
prompt_embeds: Optional[torch.
|
291
|
-
negative_prompt_embeds: Optional[torch.
|
290
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
291
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
292
292
|
lora_scale: Optional[float] = None,
|
293
293
|
clip_skip: Optional[int] = None,
|
294
294
|
):
|
@@ -308,10 +308,10 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
308
308
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
309
309
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
310
310
|
less than `1`).
|
311
|
-
prompt_embeds (`torch.
|
311
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
312
312
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
313
313
|
provided, text embeddings will be generated from `prompt` input argument.
|
314
|
-
negative_prompt_embeds (`torch.
|
314
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
315
315
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
316
316
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
317
317
|
argument.
|
@@ -581,7 +581,12 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
581
581
|
|
582
582
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
583
583
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
584
|
-
shape = (
|
584
|
+
shape = (
|
585
|
+
batch_size,
|
586
|
+
num_channels_latents,
|
587
|
+
int(height) // self.vae_scale_factor,
|
588
|
+
int(width) // self.vae_scale_factor,
|
589
|
+
)
|
585
590
|
if isinstance(generator, list) and len(generator) != batch_size:
|
586
591
|
raise ValueError(
|
587
592
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -741,12 +746,12 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
741
746
|
num_images_per_prompt: int = 1,
|
742
747
|
eta: float = 0.0,
|
743
748
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
744
|
-
latents: Optional[torch.
|
745
|
-
prompt_embeds: Optional[torch.
|
746
|
-
negative_prompt_embeds: Optional[torch.
|
749
|
+
latents: Optional[torch.Tensor] = None,
|
750
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
751
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
747
752
|
output_type: Optional[str] = "pil",
|
748
753
|
return_dict: bool = True,
|
749
|
-
callback: Optional[Callable[[int, int, torch.
|
754
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
750
755
|
callback_steps: int = 1,
|
751
756
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
752
757
|
max_iter_to_alter: int = 25,
|
@@ -784,14 +789,14 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
784
789
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
785
790
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
786
791
|
generation deterministic.
|
787
|
-
latents (`torch.
|
792
|
+
latents (`torch.Tensor`, *optional*):
|
788
793
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
789
794
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
790
795
|
tensor is generated by sampling using the supplied random `generator`.
|
791
|
-
prompt_embeds (`torch.
|
796
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
792
797
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
793
798
|
provided, text embeddings are generated from the `prompt` input argument.
|
794
|
-
negative_prompt_embeds (`torch.
|
799
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
795
800
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
796
801
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
797
802
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
@@ -801,7 +806,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
801
806
|
plain tuple.
|
802
807
|
callback (`Callable`, *optional*):
|
803
808
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
804
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
809
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
805
810
|
callback_steps (`int`, *optional*, defaults to 1):
|
806
811
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
807
812
|
every step.
|
@@ -902,6 +907,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
902
907
|
if attn_res is None:
|
903
908
|
attn_res = int(np.ceil(width / 32)), int(np.ceil(height / 32))
|
904
909
|
self.attention_store = AttentionStore(attn_res)
|
910
|
+
original_attn_proc = self.unet.attn_processors
|
905
911
|
self.register_attention_control()
|
906
912
|
|
907
913
|
# default config for step size from original repo
|
@@ -1016,6 +1022,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
|
1016
1022
|
|
1017
1023
|
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
|
1018
1024
|
self.maybe_free_model_hooks()
|
1025
|
+
# make sure to set the original attention processors back
|
1026
|
+
self.unet.set_attn_processor(original_attn_proc)
|
1019
1027
|
|
1020
1028
|
if not return_dict:
|
1021
1029
|
return (image, has_nsfw_concept)
|
@@ -53,7 +53,7 @@ class DiffEditInversionPipelineOutput(BaseOutput):
|
|
53
53
|
Output class for Stable Diffusion pipelines.
|
54
54
|
|
55
55
|
Args:
|
56
|
-
latents (`torch.
|
56
|
+
latents (`torch.Tensor`)
|
57
57
|
inverted latents tensor
|
58
58
|
images (`List[PIL.Image.Image]` or `np.ndarray`)
|
59
59
|
List of denoised PIL images of length `num_timesteps * batch_size` or numpy array of shape `(num_timesteps,
|
@@ -61,7 +61,7 @@ class DiffEditInversionPipelineOutput(BaseOutput):
|
|
61
61
|
diffusion pipeline.
|
62
62
|
"""
|
63
63
|
|
64
|
-
latents: torch.
|
64
|
+
latents: torch.Tensor
|
65
65
|
images: Union[List[PIL.Image.Image], np.ndarray]
|
66
66
|
|
67
67
|
|
@@ -185,7 +185,7 @@ def preprocess(image):
|
|
185
185
|
def preprocess_mask(mask, batch_size: int = 1):
|
186
186
|
if not isinstance(mask, torch.Tensor):
|
187
187
|
# preprocess mask
|
188
|
-
if isinstance(mask, PIL.Image.Image
|
188
|
+
if isinstance(mask, (PIL.Image.Image, np.ndarray)):
|
189
189
|
mask = [mask]
|
190
190
|
|
191
191
|
if isinstance(mask, list):
|
@@ -381,8 +381,8 @@ class StableDiffusionDiffEditPipeline(
|
|
381
381
|
num_images_per_prompt,
|
382
382
|
do_classifier_free_guidance,
|
383
383
|
negative_prompt=None,
|
384
|
-
prompt_embeds: Optional[torch.
|
385
|
-
negative_prompt_embeds: Optional[torch.
|
384
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
385
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
386
386
|
lora_scale: Optional[float] = None,
|
387
387
|
**kwargs,
|
388
388
|
):
|
@@ -414,8 +414,8 @@ class StableDiffusionDiffEditPipeline(
|
|
414
414
|
num_images_per_prompt,
|
415
415
|
do_classifier_free_guidance,
|
416
416
|
negative_prompt=None,
|
417
|
-
prompt_embeds: Optional[torch.
|
418
|
-
negative_prompt_embeds: Optional[torch.
|
417
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
418
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
419
419
|
lora_scale: Optional[float] = None,
|
420
420
|
clip_skip: Optional[int] = None,
|
421
421
|
):
|
@@ -435,10 +435,10 @@ class StableDiffusionDiffEditPipeline(
|
|
435
435
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
436
436
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
437
437
|
less than `1`).
|
438
|
-
prompt_embeds (`torch.
|
438
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
439
439
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
440
440
|
provided, text embeddings will be generated from `prompt` input argument.
|
441
|
-
negative_prompt_embeds (`torch.
|
441
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
442
442
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
443
443
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
444
444
|
argument.
|
@@ -740,7 +740,12 @@ class StableDiffusionDiffEditPipeline(
|
|
740
740
|
|
741
741
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
742
742
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
743
|
-
shape = (
|
743
|
+
shape = (
|
744
|
+
batch_size,
|
745
|
+
num_channels_latents,
|
746
|
+
int(height) // self.vae_scale_factor,
|
747
|
+
int(width) // self.vae_scale_factor,
|
748
|
+
)
|
744
749
|
if isinstance(generator, list) and len(generator) != batch_size:
|
745
750
|
raise ValueError(
|
746
751
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -826,15 +831,15 @@ class StableDiffusionDiffEditPipeline(
|
|
826
831
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
827
832
|
def generate_mask(
|
828
833
|
self,
|
829
|
-
image: Union[torch.
|
834
|
+
image: Union[torch.Tensor, PIL.Image.Image] = None,
|
830
835
|
target_prompt: Optional[Union[str, List[str]]] = None,
|
831
836
|
target_negative_prompt: Optional[Union[str, List[str]]] = None,
|
832
|
-
target_prompt_embeds: Optional[torch.
|
833
|
-
target_negative_prompt_embeds: Optional[torch.
|
837
|
+
target_prompt_embeds: Optional[torch.Tensor] = None,
|
838
|
+
target_negative_prompt_embeds: Optional[torch.Tensor] = None,
|
834
839
|
source_prompt: Optional[Union[str, List[str]]] = None,
|
835
840
|
source_negative_prompt: Optional[Union[str, List[str]]] = None,
|
836
|
-
source_prompt_embeds: Optional[torch.
|
837
|
-
source_negative_prompt_embeds: Optional[torch.
|
841
|
+
source_prompt_embeds: Optional[torch.Tensor] = None,
|
842
|
+
source_negative_prompt_embeds: Optional[torch.Tensor] = None,
|
838
843
|
num_maps_per_mask: Optional[int] = 10,
|
839
844
|
mask_encode_strength: Optional[float] = 0.5,
|
840
845
|
mask_thresholding_ratio: Optional[float] = 3.0,
|
@@ -856,10 +861,10 @@ class StableDiffusionDiffEditPipeline(
|
|
856
861
|
target_negative_prompt (`str` or `List[str]`, *optional*):
|
857
862
|
The prompt or prompts to guide what to not include in image generation. If not defined, you need to
|
858
863
|
pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
|
859
|
-
target_prompt_embeds (`torch.
|
864
|
+
target_prompt_embeds (`torch.Tensor`, *optional*):
|
860
865
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
861
866
|
provided, text embeddings are generated from the `prompt` input argument.
|
862
|
-
target_negative_prompt_embeds (`torch.
|
867
|
+
target_negative_prompt_embeds (`torch.Tensor`, *optional*):
|
863
868
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
864
869
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
865
870
|
source_prompt (`str` or `List[str]`, *optional*):
|
@@ -868,11 +873,11 @@ class StableDiffusionDiffEditPipeline(
|
|
868
873
|
source_negative_prompt (`str` or `List[str]`, *optional*):
|
869
874
|
The prompt or prompts to guide semantic mask generation away from using DiffEdit. If not defined, you
|
870
875
|
need to pass `source_negative_prompt_embeds` or `source_image` instead.
|
871
|
-
source_prompt_embeds (`torch.
|
876
|
+
source_prompt_embeds (`torch.Tensor`, *optional*):
|
872
877
|
Pre-generated text embeddings to guide the semantic mask generation. Can be used to easily tweak text
|
873
878
|
inputs (prompt weighting). If not provided, text embeddings are generated from `source_prompt` input
|
874
879
|
argument.
|
875
|
-
source_negative_prompt_embeds (`torch.
|
880
|
+
source_negative_prompt_embeds (`torch.Tensor`, *optional*):
|
876
881
|
Pre-generated text embeddings to negatively guide the semantic mask generation. Can be used to easily
|
877
882
|
tweak text inputs (prompt weighting). If not provided, text embeddings are generated from
|
878
883
|
`source_negative_prompt` input argument.
|
@@ -1046,18 +1051,18 @@ class StableDiffusionDiffEditPipeline(
|
|
1046
1051
|
def invert(
|
1047
1052
|
self,
|
1048
1053
|
prompt: Optional[Union[str, List[str]]] = None,
|
1049
|
-
image: Union[torch.
|
1054
|
+
image: Union[torch.Tensor, PIL.Image.Image] = None,
|
1050
1055
|
num_inference_steps: int = 50,
|
1051
1056
|
inpaint_strength: float = 0.8,
|
1052
1057
|
guidance_scale: float = 7.5,
|
1053
1058
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
1054
1059
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
1055
|
-
prompt_embeds: Optional[torch.
|
1056
|
-
negative_prompt_embeds: Optional[torch.
|
1060
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
1061
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
1057
1062
|
decode_latents: bool = False,
|
1058
1063
|
output_type: Optional[str] = "pil",
|
1059
1064
|
return_dict: bool = True,
|
1060
|
-
callback: Optional[Callable[[int, int, torch.
|
1065
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
1061
1066
|
callback_steps: Optional[int] = 1,
|
1062
1067
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
1063
1068
|
lambda_auto_corr: float = 20.0,
|
@@ -1090,10 +1095,10 @@ class StableDiffusionDiffEditPipeline(
|
|
1090
1095
|
generator (`torch.Generator`, *optional*):
|
1091
1096
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1092
1097
|
generation deterministic.
|
1093
|
-
prompt_embeds (`torch.
|
1098
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
1094
1099
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
1095
1100
|
provided, text embeddings are generated from the `prompt` input argument.
|
1096
|
-
negative_prompt_embeds (`torch.
|
1101
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
1097
1102
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
1098
1103
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
1099
1104
|
decode_latents (`bool`, *optional*, defaults to `False`):
|
@@ -1106,7 +1111,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1106
1111
|
plain tuple.
|
1107
1112
|
callback (`Callable`, *optional*):
|
1108
1113
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
1109
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
1114
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
1110
1115
|
callback_steps (`int`, *optional*, defaults to 1):
|
1111
1116
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
1112
1117
|
every step.
|
@@ -1284,8 +1289,8 @@ class StableDiffusionDiffEditPipeline(
|
|
1284
1289
|
def __call__(
|
1285
1290
|
self,
|
1286
1291
|
prompt: Optional[Union[str, List[str]]] = None,
|
1287
|
-
mask_image: Union[torch.
|
1288
|
-
image_latents: Union[torch.
|
1292
|
+
mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
|
1293
|
+
image_latents: Union[torch.Tensor, PIL.Image.Image] = None,
|
1289
1294
|
inpaint_strength: Optional[float] = 0.8,
|
1290
1295
|
num_inference_steps: int = 50,
|
1291
1296
|
guidance_scale: float = 7.5,
|
@@ -1293,15 +1298,15 @@ class StableDiffusionDiffEditPipeline(
|
|
1293
1298
|
num_images_per_prompt: Optional[int] = 1,
|
1294
1299
|
eta: float = 0.0,
|
1295
1300
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
1296
|
-
latents: Optional[torch.
|
1297
|
-
prompt_embeds: Optional[torch.
|
1298
|
-
negative_prompt_embeds: Optional[torch.
|
1301
|
+
latents: Optional[torch.Tensor] = None,
|
1302
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
1303
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
1299
1304
|
output_type: Optional[str] = "pil",
|
1300
1305
|
return_dict: bool = True,
|
1301
|
-
callback: Optional[Callable[[int, int, torch.
|
1306
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
1302
1307
|
callback_steps: int = 1,
|
1303
1308
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
1304
|
-
|
1309
|
+
clip_skip: int = None,
|
1305
1310
|
):
|
1306
1311
|
r"""
|
1307
1312
|
The call function to the pipeline for generation.
|
@@ -1314,7 +1319,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1314
1319
|
repainted, while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
|
1315
1320
|
single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
|
1316
1321
|
instead of 3, so the expected shape would be `(B, 1, H, W)`.
|
1317
|
-
image_latents (`PIL.Image.Image` or `torch.
|
1322
|
+
image_latents (`PIL.Image.Image` or `torch.Tensor`):
|
1318
1323
|
Partially noised image latents from the inversion process to be used as inputs for image generation.
|
1319
1324
|
inpaint_strength (`float`, *optional*, defaults to 0.8):
|
1320
1325
|
Indicates extent to inpaint the masked area. Must be between 0 and 1. When `inpaint_strength` is 1, the
|
@@ -1338,14 +1343,14 @@ class StableDiffusionDiffEditPipeline(
|
|
1338
1343
|
generator (`torch.Generator`, *optional*):
|
1339
1344
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1340
1345
|
generation deterministic.
|
1341
|
-
latents (`torch.
|
1346
|
+
latents (`torch.Tensor`, *optional*):
|
1342
1347
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
1343
1348
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
1344
1349
|
tensor is generated by sampling using the supplied random `generator`.
|
1345
|
-
prompt_embeds (`torch.
|
1350
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
1346
1351
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
1347
1352
|
provided, text embeddings are generated from the `prompt` input argument.
|
1348
|
-
negative_prompt_embeds (`torch.
|
1353
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
1349
1354
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
1350
1355
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
1351
1356
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
@@ -1355,7 +1360,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1355
1360
|
plain tuple.
|
1356
1361
|
callback (`Callable`, *optional*):
|
1357
1362
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
1358
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
1363
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
1359
1364
|
callback_steps (`int`, *optional*, defaults to 1):
|
1360
1365
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
1361
1366
|
every step.
|
@@ -1423,7 +1428,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1423
1428
|
prompt_embeds=prompt_embeds,
|
1424
1429
|
negative_prompt_embeds=negative_prompt_embeds,
|
1425
1430
|
lora_scale=text_encoder_lora_scale,
|
1426
|
-
clip_skip=
|
1431
|
+
clip_skip=clip_skip,
|
1427
1432
|
)
|
1428
1433
|
# For classifier free guidance, we need to do two forward passes.
|
1429
1434
|
# Here we concatenate the unconditional and text embeddings into a single batch
|