diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -180,8 +180,8 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
180
180
|
num_images_per_prompt,
|
181
181
|
do_classifier_free_guidance,
|
182
182
|
negative_prompt=None,
|
183
|
-
prompt_embeds: Optional[torch.
|
184
|
-
negative_prompt_embeds: Optional[torch.
|
183
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
184
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
185
185
|
lora_scale: Optional[float] = None,
|
186
186
|
**kwargs,
|
187
187
|
):
|
@@ -213,8 +213,8 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
213
213
|
num_images_per_prompt,
|
214
214
|
do_classifier_free_guidance,
|
215
215
|
negative_prompt=None,
|
216
|
-
prompt_embeds: Optional[torch.
|
217
|
-
negative_prompt_embeds: Optional[torch.
|
216
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
217
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
218
218
|
lora_scale: Optional[float] = None,
|
219
219
|
clip_skip: Optional[int] = None,
|
220
220
|
):
|
@@ -234,10 +234,10 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
234
234
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
235
235
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
236
236
|
less than `1`).
|
237
|
-
prompt_embeds (`torch.
|
237
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
238
238
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
239
239
|
provided, text embeddings will be generated from `prompt` input argument.
|
240
|
-
negative_prompt_embeds (`torch.
|
240
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
241
241
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
242
242
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
243
243
|
argument.
|
@@ -469,14 +469,19 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
469
469
|
)
|
470
470
|
|
471
471
|
if len(gligen_phrases) != len(gligen_boxes):
|
472
|
-
ValueError(
|
472
|
+
raise ValueError(
|
473
473
|
"length of `gligen_phrases` and `gligen_boxes` has to be same, but"
|
474
474
|
f" got: `gligen_phrases` {len(gligen_phrases)} != `gligen_boxes` {len(gligen_boxes)}"
|
475
475
|
)
|
476
476
|
|
477
477
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
478
478
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
479
|
-
shape = (
|
479
|
+
shape = (
|
480
|
+
batch_size,
|
481
|
+
num_channels_latents,
|
482
|
+
int(height) // self.vae_scale_factor,
|
483
|
+
int(width) // self.vae_scale_factor,
|
484
|
+
)
|
480
485
|
if isinstance(generator, list) and len(generator) != batch_size:
|
481
486
|
raise ValueError(
|
482
487
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -536,12 +541,12 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
536
541
|
num_images_per_prompt: Optional[int] = 1,
|
537
542
|
eta: float = 0.0,
|
538
543
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
539
|
-
latents: Optional[torch.
|
540
|
-
prompt_embeds: Optional[torch.
|
541
|
-
negative_prompt_embeds: Optional[torch.
|
544
|
+
latents: Optional[torch.Tensor] = None,
|
545
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
546
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
542
547
|
output_type: Optional[str] = "pil",
|
543
548
|
return_dict: bool = True,
|
544
|
-
callback: Optional[Callable[[int, int, torch.
|
549
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
545
550
|
callback_steps: int = 1,
|
546
551
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
547
552
|
clip_skip: Optional[int] = None,
|
@@ -587,14 +592,14 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
587
592
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
588
593
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
589
594
|
generation deterministic.
|
590
|
-
latents (`torch.
|
595
|
+
latents (`torch.Tensor`, *optional*):
|
591
596
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
592
597
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
593
598
|
tensor is generated by sampling using the supplied random `generator`.
|
594
|
-
prompt_embeds (`torch.
|
599
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
595
600
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
596
601
|
provided, text embeddings are generated from the `prompt` input argument.
|
597
|
-
negative_prompt_embeds (`torch.
|
602
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
598
603
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
599
604
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
600
605
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
@@ -604,7 +609,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
604
609
|
plain tuple.
|
605
610
|
callback (`Callable`, *optional*):
|
606
611
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
607
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
612
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
608
613
|
callback_steps (`int`, *optional*, defaults to 1):
|
609
614
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
610
615
|
every step.
|
@@ -680,7 +685,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
680
685
|
timesteps = self.scheduler.timesteps
|
681
686
|
|
682
687
|
# 5. Prepare latent variables
|
683
|
-
num_channels_latents = self.unet.in_channels
|
688
|
+
num_channels_latents = self.unet.config.in_channels
|
684
689
|
latents = self.prepare_latents(
|
685
690
|
batch_size * num_images_per_prompt,
|
686
691
|
num_channels_latents,
|
@@ -713,7 +718,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
713
718
|
boxes = torch.zeros(max_objs, 4, device=device, dtype=self.text_encoder.dtype)
|
714
719
|
boxes[:n_objs] = torch.tensor(gligen_boxes)
|
715
720
|
text_embeddings = torch.zeros(
|
716
|
-
max_objs, self.unet.cross_attention_dim, device=device, dtype=self.text_encoder.dtype
|
721
|
+
max_objs, self.unet.config.cross_attention_dim, device=device, dtype=self.text_encoder.dtype
|
717
722
|
)
|
718
723
|
text_embeddings[:n_objs] = _text_embeddings
|
719
724
|
# Generate a mask for each object that is entity described by phrases
|
@@ -238,8 +238,8 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
238
238
|
num_images_per_prompt,
|
239
239
|
do_classifier_free_guidance,
|
240
240
|
negative_prompt=None,
|
241
|
-
prompt_embeds: Optional[torch.
|
242
|
-
negative_prompt_embeds: Optional[torch.
|
241
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
242
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
243
243
|
lora_scale: Optional[float] = None,
|
244
244
|
clip_skip: Optional[int] = None,
|
245
245
|
):
|
@@ -259,10 +259,10 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
259
259
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
260
260
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
261
261
|
less than `1`).
|
262
|
-
prompt_embeds (`torch.
|
262
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
263
263
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
264
264
|
provided, text embeddings will be generated from `prompt` input argument.
|
265
|
-
negative_prompt_embeds (`torch.
|
265
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
266
266
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
267
267
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
268
268
|
argument.
|
@@ -500,7 +500,12 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
500
500
|
|
501
501
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
502
502
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
503
|
-
shape = (
|
503
|
+
shape = (
|
504
|
+
batch_size,
|
505
|
+
num_channels_latents,
|
506
|
+
int(height) // self.vae_scale_factor,
|
507
|
+
int(width) // self.vae_scale_factor,
|
508
|
+
)
|
504
509
|
if isinstance(generator, list) and len(generator) != batch_size:
|
505
510
|
raise ValueError(
|
506
511
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -700,12 +705,12 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
700
705
|
num_images_per_prompt: Optional[int] = 1,
|
701
706
|
eta: float = 0.0,
|
702
707
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
703
|
-
latents: Optional[torch.
|
704
|
-
prompt_embeds: Optional[torch.
|
705
|
-
negative_prompt_embeds: Optional[torch.
|
708
|
+
latents: Optional[torch.Tensor] = None,
|
709
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
710
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
706
711
|
output_type: Optional[str] = "pil",
|
707
712
|
return_dict: bool = True,
|
708
|
-
callback: Optional[Callable[[int, int, torch.
|
713
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
709
714
|
callback_steps: int = 1,
|
710
715
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
711
716
|
gligen_normalize_constant: float = 28.7,
|
@@ -759,14 +764,14 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
759
764
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
760
765
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
761
766
|
generation deterministic.
|
762
|
-
latents (`torch.
|
767
|
+
latents (`torch.Tensor`, *optional*):
|
763
768
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
764
769
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
765
770
|
tensor is generated by sampling using the supplied random `generator`.
|
766
|
-
prompt_embeds (`torch.
|
771
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
767
772
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
768
773
|
provided, text embeddings are generated from the `prompt` input argument.
|
769
|
-
negative_prompt_embeds (`torch.
|
774
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
770
775
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
771
776
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
772
777
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
@@ -776,7 +781,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
776
781
|
plain tuple.
|
777
782
|
callback (`Callable`, *optional*):
|
778
783
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
779
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
784
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
780
785
|
callback_steps (`int`, *optional*, defaults to 1):
|
781
786
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
782
787
|
every step.
|
@@ -847,7 +852,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
847
852
|
timesteps = self.scheduler.timesteps
|
848
853
|
|
849
854
|
# 5. Prepare latent variables
|
850
|
-
num_channels_latents = self.unet.in_channels
|
855
|
+
num_channels_latents = self.unet.config.in_channels
|
851
856
|
latents = self.prepare_latents(
|
852
857
|
batch_size * num_images_per_prompt,
|
853
858
|
num_channels_latents,
|
@@ -154,8 +154,8 @@ class StableDiffusionKDiffusionPipeline(
|
|
154
154
|
num_images_per_prompt,
|
155
155
|
do_classifier_free_guidance,
|
156
156
|
negative_prompt=None,
|
157
|
-
prompt_embeds: Optional[torch.
|
158
|
-
negative_prompt_embeds: Optional[torch.
|
157
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
158
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
159
159
|
lora_scale: Optional[float] = None,
|
160
160
|
**kwargs,
|
161
161
|
):
|
@@ -187,8 +187,8 @@ class StableDiffusionKDiffusionPipeline(
|
|
187
187
|
num_images_per_prompt,
|
188
188
|
do_classifier_free_guidance,
|
189
189
|
negative_prompt=None,
|
190
|
-
prompt_embeds: Optional[torch.
|
191
|
-
negative_prompt_embeds: Optional[torch.
|
190
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
191
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
192
192
|
lora_scale: Optional[float] = None,
|
193
193
|
clip_skip: Optional[int] = None,
|
194
194
|
):
|
@@ -208,10 +208,10 @@ class StableDiffusionKDiffusionPipeline(
|
|
208
208
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
209
209
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
210
210
|
less than `1`).
|
211
|
-
prompt_embeds (`torch.
|
211
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
212
212
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
213
213
|
provided, text embeddings will be generated from `prompt` input argument.
|
214
|
-
negative_prompt_embeds (`torch.
|
214
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
215
215
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
216
216
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
217
217
|
argument.
|
@@ -441,7 +441,12 @@ class StableDiffusionKDiffusionPipeline(
|
|
441
441
|
)
|
442
442
|
|
443
443
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
444
|
-
shape = (
|
444
|
+
shape = (
|
445
|
+
batch_size,
|
446
|
+
num_channels_latents,
|
447
|
+
int(height) // self.vae_scale_factor,
|
448
|
+
int(width) // self.vae_scale_factor,
|
449
|
+
)
|
445
450
|
if latents is None:
|
446
451
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
447
452
|
else:
|
@@ -464,12 +469,12 @@ class StableDiffusionKDiffusionPipeline(
|
|
464
469
|
num_images_per_prompt: Optional[int] = 1,
|
465
470
|
eta: float = 0.0,
|
466
471
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
467
|
-
latents: Optional[torch.
|
468
|
-
prompt_embeds: Optional[torch.
|
469
|
-
negative_prompt_embeds: Optional[torch.
|
472
|
+
latents: Optional[torch.Tensor] = None,
|
473
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
474
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
470
475
|
output_type: Optional[str] = "pil",
|
471
476
|
return_dict: bool = True,
|
472
|
-
callback: Optional[Callable[[int, int, torch.
|
477
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
473
478
|
callback_steps: int = 1,
|
474
479
|
use_karras_sigmas: Optional[bool] = False,
|
475
480
|
noise_sampler_seed: Optional[int] = None,
|
@@ -507,14 +512,14 @@ class StableDiffusionKDiffusionPipeline(
|
|
507
512
|
generator (`torch.Generator`, *optional*):
|
508
513
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
509
514
|
to make generation deterministic.
|
510
|
-
latents (`torch.
|
515
|
+
latents (`torch.Tensor`, *optional*):
|
511
516
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
512
517
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
513
518
|
tensor will ge generated by sampling using the supplied random `generator`.
|
514
|
-
prompt_embeds (`torch.
|
519
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
515
520
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
516
521
|
provided, text embeddings will be generated from `prompt` input argument.
|
517
|
-
negative_prompt_embeds (`torch.
|
522
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
518
523
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
519
524
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
520
525
|
argument.
|
@@ -526,7 +531,7 @@ class StableDiffusionKDiffusionPipeline(
|
|
526
531
|
plain tuple.
|
527
532
|
callback (`Callable`, *optional*):
|
528
533
|
A function that will be called every `callback_steps` steps during inference. The function will be
|
529
|
-
called with the following arguments: `callback(step: int, timestep: int, latents: torch.
|
534
|
+
called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
530
535
|
callback_steps (`int`, *optional*, defaults to 1):
|
531
536
|
The frequency at which the `callback` function will be called. If not specified, the callback will be
|
532
537
|
called at every step.
|
diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
CHANGED
@@ -207,10 +207,10 @@ class StableDiffusionXLKDiffusionPipeline(
|
|
207
207
|
do_classifier_free_guidance: bool = True,
|
208
208
|
negative_prompt: Optional[str] = None,
|
209
209
|
negative_prompt_2: Optional[str] = None,
|
210
|
-
prompt_embeds: Optional[torch.
|
211
|
-
negative_prompt_embeds: Optional[torch.
|
212
|
-
pooled_prompt_embeds: Optional[torch.
|
213
|
-
negative_pooled_prompt_embeds: Optional[torch.
|
210
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
211
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
212
|
+
pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
213
|
+
negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
214
214
|
lora_scale: Optional[float] = None,
|
215
215
|
clip_skip: Optional[int] = None,
|
216
216
|
):
|
@@ -236,17 +236,17 @@ class StableDiffusionXLKDiffusionPipeline(
|
|
236
236
|
negative_prompt_2 (`str` or `List[str]`, *optional*):
|
237
237
|
The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
|
238
238
|
`text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
|
239
|
-
prompt_embeds (`torch.
|
239
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
240
240
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
241
241
|
provided, text embeddings will be generated from `prompt` input argument.
|
242
|
-
negative_prompt_embeds (`torch.
|
242
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
243
243
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
244
244
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
245
245
|
argument.
|
246
|
-
pooled_prompt_embeds (`torch.
|
246
|
+
pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
247
247
|
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
248
248
|
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
249
|
-
negative_pooled_prompt_embeds (`torch.
|
249
|
+
negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
250
250
|
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
251
251
|
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
252
252
|
input argument.
|
@@ -497,7 +497,12 @@ class StableDiffusionXLKDiffusionPipeline(
|
|
497
497
|
)
|
498
498
|
|
499
499
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
500
|
-
shape = (
|
500
|
+
shape = (
|
501
|
+
batch_size,
|
502
|
+
num_channels_latents,
|
503
|
+
int(height) // self.vae_scale_factor,
|
504
|
+
int(width) // self.vae_scale_factor,
|
505
|
+
)
|
501
506
|
if isinstance(generator, list) and len(generator) != batch_size:
|
502
507
|
raise ValueError(
|
503
508
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -579,11 +584,11 @@ class StableDiffusionXLKDiffusionPipeline(
|
|
579
584
|
negative_prompt_2: Optional[Union[str, List[str]]] = None,
|
580
585
|
num_images_per_prompt: Optional[int] = 1,
|
581
586
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
582
|
-
latents: Optional[torch.
|
583
|
-
prompt_embeds: Optional[torch.
|
584
|
-
negative_prompt_embeds: Optional[torch.
|
585
|
-
pooled_prompt_embeds: Optional[torch.
|
586
|
-
negative_pooled_prompt_embeds: Optional[torch.
|
587
|
+
latents: Optional[torch.Tensor] = None,
|
588
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
589
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
590
|
+
pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
591
|
+
negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
587
592
|
output_type: Optional[str] = "pil",
|
588
593
|
return_dict: bool = True,
|
589
594
|
original_size: Optional[Tuple[int, int]] = None,
|
@@ -637,21 +642,21 @@ class StableDiffusionXLKDiffusionPipeline(
|
|
637
642
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
638
643
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
639
644
|
to make generation deterministic.
|
640
|
-
latents (`torch.
|
645
|
+
latents (`torch.Tensor`, *optional*):
|
641
646
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
642
647
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
643
648
|
tensor will ge generated by sampling using the supplied random `generator`.
|
644
|
-
prompt_embeds (`torch.
|
649
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
645
650
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
646
651
|
provided, text embeddings will be generated from `prompt` input argument.
|
647
|
-
negative_prompt_embeds (`torch.
|
652
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
648
653
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
649
654
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
650
655
|
argument.
|
651
|
-
pooled_prompt_embeds (`torch.
|
656
|
+
pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
652
657
|
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
653
658
|
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
654
|
-
negative_pooled_prompt_embeds (`torch.
|
659
|
+
negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
|
655
660
|
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
656
661
|
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
657
662
|
input argument.
|
@@ -80,6 +80,7 @@ def retrieve_timesteps(
|
|
80
80
|
num_inference_steps: Optional[int] = None,
|
81
81
|
device: Optional[Union[str, torch.device]] = None,
|
82
82
|
timesteps: Optional[List[int]] = None,
|
83
|
+
sigmas: Optional[List[float]] = None,
|
83
84
|
**kwargs,
|
84
85
|
):
|
85
86
|
"""
|
@@ -90,19 +91,23 @@ def retrieve_timesteps(
|
|
90
91
|
scheduler (`SchedulerMixin`):
|
91
92
|
The scheduler to get timesteps from.
|
92
93
|
num_inference_steps (`int`):
|
93
|
-
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
94
|
-
|
94
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
|
95
|
+
must be `None`.
|
95
96
|
device (`str` or `torch.device`, *optional*):
|
96
97
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
97
98
|
timesteps (`List[int]`, *optional*):
|
98
|
-
|
99
|
-
|
100
|
-
|
99
|
+
Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
|
100
|
+
`num_inference_steps` and `sigmas` must be `None`.
|
101
|
+
sigmas (`List[float]`, *optional*):
|
102
|
+
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
103
|
+
`num_inference_steps` and `timesteps` must be `None`.
|
101
104
|
|
102
105
|
Returns:
|
103
106
|
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
104
107
|
second element is the number of inference steps.
|
105
108
|
"""
|
109
|
+
if timesteps is not None and sigmas is not None:
|
110
|
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
106
111
|
if timesteps is not None:
|
107
112
|
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
108
113
|
if not accepts_timesteps:
|
@@ -113,6 +118,16 @@ def retrieve_timesteps(
|
|
113
118
|
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
114
119
|
timesteps = scheduler.timesteps
|
115
120
|
num_inference_steps = len(timesteps)
|
121
|
+
elif sigmas is not None:
|
122
|
+
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
123
|
+
if not accept_sigmas:
|
124
|
+
raise ValueError(
|
125
|
+
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
126
|
+
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
127
|
+
)
|
128
|
+
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
129
|
+
timesteps = scheduler.timesteps
|
130
|
+
num_inference_steps = len(timesteps)
|
116
131
|
else:
|
117
132
|
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
118
133
|
timesteps = scheduler.timesteps
|
@@ -239,8 +254,8 @@ class StableDiffusionLDM3DPipeline(
|
|
239
254
|
num_images_per_prompt,
|
240
255
|
do_classifier_free_guidance,
|
241
256
|
negative_prompt=None,
|
242
|
-
prompt_embeds: Optional[torch.
|
243
|
-
negative_prompt_embeds: Optional[torch.
|
257
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
258
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
244
259
|
lora_scale: Optional[float] = None,
|
245
260
|
**kwargs,
|
246
261
|
):
|
@@ -272,8 +287,8 @@ class StableDiffusionLDM3DPipeline(
|
|
272
287
|
num_images_per_prompt,
|
273
288
|
do_classifier_free_guidance,
|
274
289
|
negative_prompt=None,
|
275
|
-
prompt_embeds: Optional[torch.
|
276
|
-
negative_prompt_embeds: Optional[torch.
|
290
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
291
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
277
292
|
lora_scale: Optional[float] = None,
|
278
293
|
clip_skip: Optional[int] = None,
|
279
294
|
):
|
@@ -293,10 +308,10 @@ class StableDiffusionLDM3DPipeline(
|
|
293
308
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
294
309
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
295
310
|
less than `1`).
|
296
|
-
prompt_embeds (`torch.
|
311
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
297
312
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
298
313
|
provided, text embeddings will be generated from `prompt` input argument.
|
299
|
-
negative_prompt_embeds (`torch.
|
314
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
300
315
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
301
316
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
302
317
|
argument.
|
@@ -627,7 +642,12 @@ class StableDiffusionLDM3DPipeline(
|
|
627
642
|
)
|
628
643
|
|
629
644
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
630
|
-
shape = (
|
645
|
+
shape = (
|
646
|
+
batch_size,
|
647
|
+
num_channels_latents,
|
648
|
+
int(height) // self.vae_scale_factor,
|
649
|
+
int(width) // self.vae_scale_factor,
|
650
|
+
)
|
631
651
|
if isinstance(generator, list) and len(generator) != batch_size:
|
632
652
|
raise ValueError(
|
633
653
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -644,20 +664,22 @@ class StableDiffusionLDM3DPipeline(
|
|
644
664
|
return latents
|
645
665
|
|
646
666
|
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
647
|
-
def get_guidance_scale_embedding(
|
667
|
+
def get_guidance_scale_embedding(
|
668
|
+
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
669
|
+
) -> torch.Tensor:
|
648
670
|
"""
|
649
671
|
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
650
672
|
|
651
673
|
Args:
|
652
|
-
|
653
|
-
|
674
|
+
w (`torch.Tensor`):
|
675
|
+
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
654
676
|
embedding_dim (`int`, *optional*, defaults to 512):
|
655
|
-
|
656
|
-
dtype:
|
657
|
-
|
677
|
+
Dimension of the embeddings to generate.
|
678
|
+
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
679
|
+
Data type of the generated embeddings.
|
658
680
|
|
659
681
|
Returns:
|
660
|
-
`torch.
|
682
|
+
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
661
683
|
"""
|
662
684
|
assert len(w.shape) == 1
|
663
685
|
w = w * 1000.0
|
@@ -712,16 +734,17 @@ class StableDiffusionLDM3DPipeline(
|
|
712
734
|
width: Optional[int] = None,
|
713
735
|
num_inference_steps: int = 49,
|
714
736
|
timesteps: List[int] = None,
|
737
|
+
sigmas: List[float] = None,
|
715
738
|
guidance_scale: float = 5.0,
|
716
739
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
717
740
|
num_images_per_prompt: Optional[int] = 1,
|
718
741
|
eta: float = 0.0,
|
719
742
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
720
|
-
latents: Optional[torch.
|
721
|
-
prompt_embeds: Optional[torch.
|
722
|
-
negative_prompt_embeds: Optional[torch.
|
743
|
+
latents: Optional[torch.Tensor] = None,
|
744
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
745
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
723
746
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
724
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
747
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
725
748
|
output_type: Optional[str] = "pil",
|
726
749
|
return_dict: bool = True,
|
727
750
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -744,6 +767,14 @@ class StableDiffusionLDM3DPipeline(
|
|
744
767
|
num_inference_steps (`int`, *optional*, defaults to 50):
|
745
768
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
746
769
|
expense of slower inference.
|
770
|
+
timesteps (`List[int]`, *optional*):
|
771
|
+
Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
|
772
|
+
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
773
|
+
passed will be used. Must be in descending order.
|
774
|
+
sigmas (`List[float]`, *optional*):
|
775
|
+
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
776
|
+
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
777
|
+
will be used.
|
747
778
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
748
779
|
A higher guidance scale value encourages the model to generate images closely linked to the text
|
749
780
|
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
|
@@ -758,23 +789,23 @@ class StableDiffusionLDM3DPipeline(
|
|
758
789
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
759
790
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
760
791
|
generation deterministic.
|
761
|
-
latents (`torch.
|
792
|
+
latents (`torch.Tensor`, *optional*):
|
762
793
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
763
794
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
764
795
|
tensor is generated by sampling using the supplied random `generator`.
|
765
|
-
prompt_embeds (`torch.
|
796
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
766
797
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
767
798
|
provided, text embeddings are generated from the `prompt` input argument.
|
768
|
-
negative_prompt_embeds (`torch.
|
799
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
769
800
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
770
801
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
771
802
|
ip_adapter_image: (`PipelineImageInput`, *optional*):
|
772
803
|
Optional image input to work with IP Adapters.
|
773
|
-
ip_adapter_image_embeds (`List[torch.
|
774
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
775
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
776
|
-
if `do_classifier_free_guidance` is set to `True`.
|
777
|
-
|
804
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
805
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
806
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
807
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
808
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
778
809
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
779
810
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
780
811
|
return_dict (`bool`, *optional*, defaults to `True`):
|
@@ -881,7 +912,9 @@ class StableDiffusionLDM3DPipeline(
|
|
881
912
|
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
882
913
|
|
883
914
|
# 4. Prepare timesteps
|
884
|
-
timesteps, num_inference_steps = retrieve_timesteps(
|
915
|
+
timesteps, num_inference_steps = retrieve_timesteps(
|
916
|
+
self.scheduler, num_inference_steps, device, timesteps, sigmas
|
917
|
+
)
|
885
918
|
|
886
919
|
# 5. Prepare latent variables
|
887
920
|
num_channels_latents = self.unet.config.in_channels
|