diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,6 @@ from ...models import Kandinsky3UNet, VQModel
|
|
8
8
|
from ...schedulers import DDPMScheduler
|
9
9
|
from ...utils import (
|
10
10
|
deprecate,
|
11
|
-
is_accelerate_available,
|
12
11
|
logging,
|
13
12
|
replace_example_docstring,
|
14
13
|
)
|
@@ -24,7 +23,9 @@ EXAMPLE_DOC_STRING = """
|
|
24
23
|
>>> from diffusers import AutoPipelineForText2Image
|
25
24
|
>>> import torch
|
26
25
|
|
27
|
-
>>> pipe = AutoPipelineForText2Image.from_pretrained(
|
26
|
+
>>> pipe = AutoPipelineForText2Image.from_pretrained(
|
27
|
+
... "kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
|
28
|
+
... )
|
28
29
|
>>> pipe.enable_model_cpu_offload()
|
29
30
|
|
30
31
|
>>> prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background."
|
@@ -70,20 +71,6 @@ class Kandinsky3Pipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
70
71
|
tokenizer=tokenizer, text_encoder=text_encoder, unet=unet, scheduler=scheduler, movq=movq
|
71
72
|
)
|
72
73
|
|
73
|
-
def remove_all_hooks(self):
|
74
|
-
if is_accelerate_available():
|
75
|
-
from accelerate.hooks import remove_hook_from_module
|
76
|
-
else:
|
77
|
-
raise ImportError("Please install accelerate via `pip install accelerate`")
|
78
|
-
|
79
|
-
for model in [self.text_encoder, self.unet, self.movq]:
|
80
|
-
if model is not None:
|
81
|
-
remove_hook_from_module(model, recurse=True)
|
82
|
-
|
83
|
-
self.unet_offload_hook = None
|
84
|
-
self.text_encoder_offload_hook = None
|
85
|
-
self.final_offload_hook = None
|
86
|
-
|
87
74
|
def process_embeds(self, embeddings, attention_mask, cut_context):
|
88
75
|
if cut_context:
|
89
76
|
embeddings[attention_mask == 0] = torch.zeros_like(embeddings[attention_mask == 0])
|
@@ -100,11 +87,11 @@ class Kandinsky3Pipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
100
87
|
num_images_per_prompt=1,
|
101
88
|
device=None,
|
102
89
|
negative_prompt=None,
|
103
|
-
prompt_embeds: Optional[torch.
|
104
|
-
negative_prompt_embeds: Optional[torch.
|
90
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
91
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
105
92
|
_cut_context=False,
|
106
|
-
attention_mask: Optional[torch.
|
107
|
-
negative_attention_mask: Optional[torch.
|
93
|
+
attention_mask: Optional[torch.Tensor] = None,
|
94
|
+
negative_attention_mask: Optional[torch.Tensor] = None,
|
108
95
|
):
|
109
96
|
r"""
|
110
97
|
Encodes the prompt into text encoder hidden states.
|
@@ -122,16 +109,16 @@ class Kandinsky3Pipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
122
109
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
123
110
|
`negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
|
124
111
|
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
|
125
|
-
prompt_embeds (`torch.
|
112
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
126
113
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
127
114
|
provided, text embeddings will be generated from `prompt` input argument.
|
128
|
-
negative_prompt_embeds (`torch.
|
115
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
129
116
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
130
117
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
131
118
|
argument.
|
132
|
-
attention_mask (`torch.
|
119
|
+
attention_mask (`torch.Tensor`, *optional*):
|
133
120
|
Pre-generated attention mask. Must provide if passing `prompt_embeds` directly.
|
134
|
-
negative_attention_mask (`torch.
|
121
|
+
negative_attention_mask (`torch.Tensor`, *optional*):
|
135
122
|
Pre-generated negative attention mask. Must provide if passing `negative_prompt_embeds` directly.
|
136
123
|
"""
|
137
124
|
if prompt is not None and negative_prompt is not None:
|
@@ -347,10 +334,10 @@ class Kandinsky3Pipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
347
334
|
height: Optional[int] = 1024,
|
348
335
|
width: Optional[int] = 1024,
|
349
336
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
350
|
-
prompt_embeds: Optional[torch.
|
351
|
-
negative_prompt_embeds: Optional[torch.
|
352
|
-
attention_mask: Optional[torch.
|
353
|
-
negative_attention_mask: Optional[torch.
|
337
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
338
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
339
|
+
attention_mask: Optional[torch.Tensor] = None,
|
340
|
+
negative_attention_mask: Optional[torch.Tensor] = None,
|
354
341
|
output_type: Optional[str] = "pil",
|
355
342
|
return_dict: bool = True,
|
356
343
|
latents=None,
|
@@ -393,16 +380,16 @@ class Kandinsky3Pipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
393
380
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
394
381
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
395
382
|
to make generation deterministic.
|
396
|
-
prompt_embeds (`torch.
|
383
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
397
384
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
398
385
|
provided, text embeddings will be generated from `prompt` input argument.
|
399
|
-
negative_prompt_embeds (`torch.
|
386
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
400
387
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
401
388
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
402
389
|
argument.
|
403
|
-
attention_mask (`torch.
|
390
|
+
attention_mask (`torch.Tensor`, *optional*):
|
404
391
|
Pre-generated attention mask. Must provide if passing `prompt_embeds` directly.
|
405
|
-
negative_attention_mask (`torch.
|
392
|
+
negative_attention_mask (`torch.Tensor`, *optional*):
|
406
393
|
Pre-generated negative attention mask. Must provide if passing `negative_prompt_embeds` directly.
|
407
394
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
408
395
|
The output format of the generate image. Choose between
|
@@ -411,7 +398,7 @@ class Kandinsky3Pipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
411
398
|
Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
|
412
399
|
callback (`Callable`, *optional*):
|
413
400
|
A function that will be called every `callback_steps` steps during inference. The function will be
|
414
|
-
called with the following arguments: `callback(step: int, timestep: int, latents: torch.
|
401
|
+
called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
415
402
|
callback_steps (`int`, *optional*, defaults to 1):
|
416
403
|
The frequency at which the `callback` function will be called. If not specified, the callback will be
|
417
404
|
called at every step.
|
@@ -12,7 +12,6 @@ from ...models import Kandinsky3UNet, VQModel
|
|
12
12
|
from ...schedulers import DDPMScheduler
|
13
13
|
from ...utils import (
|
14
14
|
deprecate,
|
15
|
-
is_accelerate_available,
|
16
15
|
logging,
|
17
16
|
replace_example_docstring,
|
18
17
|
)
|
@@ -29,11 +28,15 @@ EXAMPLE_DOC_STRING = """
|
|
29
28
|
>>> from diffusers.utils import load_image
|
30
29
|
>>> import torch
|
31
30
|
|
32
|
-
>>> pipe = AutoPipelineForImage2Image.from_pretrained(
|
31
|
+
>>> pipe = AutoPipelineForImage2Image.from_pretrained(
|
32
|
+
... "kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
|
33
|
+
... )
|
33
34
|
>>> pipe.enable_model_cpu_offload()
|
34
35
|
|
35
36
|
>>> prompt = "A painting of the inside of a subway train with tiny raccoons."
|
36
|
-
>>> image = load_image(
|
37
|
+
>>> image = load_image(
|
38
|
+
... "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky3/t2i.png"
|
39
|
+
... )
|
37
40
|
|
38
41
|
>>> generator = torch.Generator(device="cpu").manual_seed(0)
|
39
42
|
>>> image = pipe(prompt, image=image, strength=0.75, num_inference_steps=25, generator=generator).images[0]
|
@@ -92,20 +95,6 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
92
95
|
|
93
96
|
return timesteps, num_inference_steps - t_start
|
94
97
|
|
95
|
-
def remove_all_hooks(self):
|
96
|
-
if is_accelerate_available():
|
97
|
-
from accelerate.hooks import remove_hook_from_module
|
98
|
-
else:
|
99
|
-
raise ImportError("Please install accelerate via `pip install accelerate`")
|
100
|
-
|
101
|
-
for model in [self.text_encoder, self.unet]:
|
102
|
-
if model is not None:
|
103
|
-
remove_hook_from_module(model, recurse=True)
|
104
|
-
|
105
|
-
self.unet_offload_hook = None
|
106
|
-
self.text_encoder_offload_hook = None
|
107
|
-
self.final_offload_hook = None
|
108
|
-
|
109
98
|
def _process_embeds(self, embeddings, attention_mask, cut_context):
|
110
99
|
# return embeddings, attention_mask
|
111
100
|
if cut_context:
|
@@ -123,11 +112,11 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
123
112
|
num_images_per_prompt=1,
|
124
113
|
device=None,
|
125
114
|
negative_prompt=None,
|
126
|
-
prompt_embeds: Optional[torch.
|
127
|
-
negative_prompt_embeds: Optional[torch.
|
115
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
116
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
128
117
|
_cut_context=False,
|
129
|
-
attention_mask: Optional[torch.
|
130
|
-
negative_attention_mask: Optional[torch.
|
118
|
+
attention_mask: Optional[torch.Tensor] = None,
|
119
|
+
negative_attention_mask: Optional[torch.Tensor] = None,
|
131
120
|
):
|
132
121
|
r"""
|
133
122
|
Encodes the prompt into text encoder hidden states.
|
@@ -145,16 +134,16 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
145
134
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
146
135
|
`negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
|
147
136
|
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
|
148
|
-
prompt_embeds (`torch.
|
137
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
149
138
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
150
139
|
provided, text embeddings will be generated from `prompt` input argument.
|
151
|
-
negative_prompt_embeds (`torch.
|
140
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
152
141
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
153
142
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
154
143
|
argument.
|
155
|
-
attention_mask (`torch.
|
144
|
+
attention_mask (`torch.Tensor`, *optional*):
|
156
145
|
Pre-generated attention mask. Must provide if passing `prompt_embeds` directly.
|
157
|
-
negative_attention_mask (`torch.
|
146
|
+
negative_attention_mask (`torch.Tensor`, *optional*):
|
158
147
|
Pre-generated negative attention mask. Must provide if passing `negative_prompt_embeds` directly.
|
159
148
|
"""
|
160
149
|
if prompt is not None and negative_prompt is not None:
|
@@ -414,17 +403,17 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
414
403
|
def __call__(
|
415
404
|
self,
|
416
405
|
prompt: Union[str, List[str]] = None,
|
417
|
-
image: Union[torch.
|
406
|
+
image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]] = None,
|
418
407
|
strength: float = 0.3,
|
419
408
|
num_inference_steps: int = 25,
|
420
409
|
guidance_scale: float = 3.0,
|
421
410
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
422
411
|
num_images_per_prompt: Optional[int] = 1,
|
423
412
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
424
|
-
prompt_embeds: Optional[torch.
|
425
|
-
negative_prompt_embeds: Optional[torch.
|
426
|
-
attention_mask: Optional[torch.
|
427
|
-
negative_attention_mask: Optional[torch.
|
413
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
414
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
415
|
+
attention_mask: Optional[torch.Tensor] = None,
|
416
|
+
negative_attention_mask: Optional[torch.Tensor] = None,
|
428
417
|
output_type: Optional[str] = "pil",
|
429
418
|
return_dict: bool = True,
|
430
419
|
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
@@ -438,7 +427,7 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
438
427
|
prompt (`str` or `List[str]`, *optional*):
|
439
428
|
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
440
429
|
instead.
|
441
|
-
image (`torch.
|
430
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
442
431
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
443
432
|
process.
|
444
433
|
strength (`float`, *optional*, defaults to 0.8):
|
@@ -465,16 +454,16 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
465
454
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
466
455
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
467
456
|
to make generation deterministic.
|
468
|
-
prompt_embeds (`torch.
|
457
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
469
458
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
470
459
|
provided, text embeddings will be generated from `prompt` input argument.
|
471
|
-
negative_prompt_embeds (`torch.
|
460
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
472
461
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
473
462
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
474
463
|
argument.
|
475
|
-
attention_mask (`torch.
|
464
|
+
attention_mask (`torch.Tensor`, *optional*):
|
476
465
|
Pre-generated attention mask. Must provide if passing `prompt_embeds` directly.
|
477
|
-
negative_attention_mask (`torch.
|
466
|
+
negative_attention_mask (`torch.Tensor`, *optional*):
|
478
467
|
Pre-generated negative attention mask. Must provide if passing `negative_prompt_embeds` directly.
|
479
468
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
480
469
|
The output format of the generate image. Choose between
|
@@ -63,6 +63,7 @@ def retrieve_timesteps(
|
|
63
63
|
num_inference_steps: Optional[int] = None,
|
64
64
|
device: Optional[Union[str, torch.device]] = None,
|
65
65
|
timesteps: Optional[List[int]] = None,
|
66
|
+
sigmas: Optional[List[float]] = None,
|
66
67
|
**kwargs,
|
67
68
|
):
|
68
69
|
"""
|
@@ -73,19 +74,23 @@ def retrieve_timesteps(
|
|
73
74
|
scheduler (`SchedulerMixin`):
|
74
75
|
The scheduler to get timesteps from.
|
75
76
|
num_inference_steps (`int`):
|
76
|
-
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
77
|
-
|
77
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
|
78
|
+
must be `None`.
|
78
79
|
device (`str` or `torch.device`, *optional*):
|
79
80
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
80
81
|
timesteps (`List[int]`, *optional*):
|
81
|
-
|
82
|
-
|
83
|
-
|
82
|
+
Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
|
83
|
+
`num_inference_steps` and `sigmas` must be `None`.
|
84
|
+
sigmas (`List[float]`, *optional*):
|
85
|
+
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
86
|
+
`num_inference_steps` and `timesteps` must be `None`.
|
84
87
|
|
85
88
|
Returns:
|
86
89
|
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
87
90
|
second element is the number of inference steps.
|
88
91
|
"""
|
92
|
+
if timesteps is not None and sigmas is not None:
|
93
|
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
89
94
|
if timesteps is not None:
|
90
95
|
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
91
96
|
if not accepts_timesteps:
|
@@ -96,6 +101,16 @@ def retrieve_timesteps(
|
|
96
101
|
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
97
102
|
timesteps = scheduler.timesteps
|
98
103
|
num_inference_steps = len(timesteps)
|
104
|
+
elif sigmas is not None:
|
105
|
+
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
106
|
+
if not accept_sigmas:
|
107
|
+
raise ValueError(
|
108
|
+
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
109
|
+
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
110
|
+
)
|
111
|
+
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
112
|
+
timesteps = scheduler.timesteps
|
113
|
+
num_inference_steps = len(timesteps)
|
99
114
|
else:
|
100
115
|
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
101
116
|
timesteps = scheduler.timesteps
|
@@ -222,8 +237,8 @@ class LatentConsistencyModelImg2ImgPipeline(
|
|
222
237
|
num_images_per_prompt,
|
223
238
|
do_classifier_free_guidance,
|
224
239
|
negative_prompt=None,
|
225
|
-
prompt_embeds: Optional[torch.
|
226
|
-
negative_prompt_embeds: Optional[torch.
|
240
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
241
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
227
242
|
lora_scale: Optional[float] = None,
|
228
243
|
clip_skip: Optional[int] = None,
|
229
244
|
):
|
@@ -243,10 +258,10 @@ class LatentConsistencyModelImg2ImgPipeline(
|
|
243
258
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
244
259
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
245
260
|
less than `1`).
|
246
|
-
prompt_embeds (`torch.
|
261
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
247
262
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
248
263
|
provided, text embeddings will be generated from `prompt` input argument.
|
249
|
-
negative_prompt_embeds (`torch.
|
264
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
250
265
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
251
266
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
252
267
|
argument.
|
@@ -548,20 +563,22 @@ class LatentConsistencyModelImg2ImgPipeline(
|
|
548
563
|
return latents
|
549
564
|
|
550
565
|
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
551
|
-
def get_guidance_scale_embedding(
|
566
|
+
def get_guidance_scale_embedding(
|
567
|
+
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
568
|
+
) -> torch.Tensor:
|
552
569
|
"""
|
553
570
|
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
554
571
|
|
555
572
|
Args:
|
556
|
-
|
557
|
-
|
573
|
+
w (`torch.Tensor`):
|
574
|
+
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
558
575
|
embedding_dim (`int`, *optional*, defaults to 512):
|
559
|
-
|
560
|
-
dtype:
|
561
|
-
|
576
|
+
Dimension of the embeddings to generate.
|
577
|
+
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
578
|
+
Data type of the generated embeddings.
|
562
579
|
|
563
580
|
Returns:
|
564
|
-
`torch.
|
581
|
+
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
565
582
|
"""
|
566
583
|
assert len(w.shape) == 1
|
567
584
|
w = w * 1000.0
|
@@ -611,7 +628,7 @@ class LatentConsistencyModelImg2ImgPipeline(
|
|
611
628
|
prompt: Union[str, List[str]],
|
612
629
|
strength: float,
|
613
630
|
callback_steps: int,
|
614
|
-
prompt_embeds: Optional[torch.
|
631
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
615
632
|
ip_adapter_image=None,
|
616
633
|
ip_adapter_image_embeds=None,
|
617
634
|
callback_on_step_end_tensor_inputs=None,
|
@@ -692,10 +709,10 @@ class LatentConsistencyModelImg2ImgPipeline(
|
|
692
709
|
guidance_scale: float = 8.5,
|
693
710
|
num_images_per_prompt: Optional[int] = 1,
|
694
711
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
695
|
-
latents: Optional[torch.
|
696
|
-
prompt_embeds: Optional[torch.
|
712
|
+
latents: Optional[torch.Tensor] = None,
|
713
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
697
714
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
698
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
715
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
699
716
|
output_type: Optional[str] = "pil",
|
700
717
|
return_dict: bool = True,
|
701
718
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -737,20 +754,20 @@ class LatentConsistencyModelImg2ImgPipeline(
|
|
737
754
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
738
755
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
739
756
|
generation deterministic.
|
740
|
-
latents (`torch.
|
757
|
+
latents (`torch.Tensor`, *optional*):
|
741
758
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
742
759
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
743
760
|
tensor is generated by sampling using the supplied random `generator`.
|
744
|
-
prompt_embeds (`torch.
|
761
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
745
762
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
746
763
|
provided, text embeddings are generated from the `prompt` input argument.
|
747
764
|
ip_adapter_image: (`PipelineImageInput`, *optional*):
|
748
765
|
Optional image input to work with IP Adapters.
|
749
|
-
ip_adapter_image_embeds (`List[torch.
|
750
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
751
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
752
|
-
if `do_classifier_free_guidance` is set to `True`.
|
753
|
-
|
766
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
767
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
768
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
769
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
770
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
754
771
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
755
772
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
756
773
|
return_dict (`bool`, *optional*, defaults to `True`):
|
@@ -870,9 +887,10 @@ class LatentConsistencyModelImg2ImgPipeline(
|
|
870
887
|
else self.scheduler.config.original_inference_steps
|
871
888
|
)
|
872
889
|
latent_timestep = timesteps[:1]
|
873
|
-
latents
|
874
|
-
|
875
|
-
|
890
|
+
if latents is None:
|
891
|
+
latents = self.prepare_latents(
|
892
|
+
image, latent_timestep, batch_size, num_images_per_prompt, prompt_embeds.dtype, device, generator
|
893
|
+
)
|
876
894
|
bs = batch_size * num_images_per_prompt
|
877
895
|
|
878
896
|
# 6. Get Guidance Scale Embedding
|
@@ -67,6 +67,7 @@ def retrieve_timesteps(
|
|
67
67
|
num_inference_steps: Optional[int] = None,
|
68
68
|
device: Optional[Union[str, torch.device]] = None,
|
69
69
|
timesteps: Optional[List[int]] = None,
|
70
|
+
sigmas: Optional[List[float]] = None,
|
70
71
|
**kwargs,
|
71
72
|
):
|
72
73
|
"""
|
@@ -77,19 +78,23 @@ def retrieve_timesteps(
|
|
77
78
|
scheduler (`SchedulerMixin`):
|
78
79
|
The scheduler to get timesteps from.
|
79
80
|
num_inference_steps (`int`):
|
80
|
-
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
81
|
-
|
81
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
|
82
|
+
must be `None`.
|
82
83
|
device (`str` or `torch.device`, *optional*):
|
83
84
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
84
85
|
timesteps (`List[int]`, *optional*):
|
85
|
-
|
86
|
-
|
87
|
-
|
86
|
+
Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
|
87
|
+
`num_inference_steps` and `sigmas` must be `None`.
|
88
|
+
sigmas (`List[float]`, *optional*):
|
89
|
+
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
90
|
+
`num_inference_steps` and `timesteps` must be `None`.
|
88
91
|
|
89
92
|
Returns:
|
90
93
|
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
91
94
|
second element is the number of inference steps.
|
92
95
|
"""
|
96
|
+
if timesteps is not None and sigmas is not None:
|
97
|
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
93
98
|
if timesteps is not None:
|
94
99
|
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
95
100
|
if not accepts_timesteps:
|
@@ -100,6 +105,16 @@ def retrieve_timesteps(
|
|
100
105
|
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
101
106
|
timesteps = scheduler.timesteps
|
102
107
|
num_inference_steps = len(timesteps)
|
108
|
+
elif sigmas is not None:
|
109
|
+
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
110
|
+
if not accept_sigmas:
|
111
|
+
raise ValueError(
|
112
|
+
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
113
|
+
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
114
|
+
)
|
115
|
+
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
116
|
+
timesteps = scheduler.timesteps
|
117
|
+
num_inference_steps = len(timesteps)
|
103
118
|
else:
|
104
119
|
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
105
120
|
timesteps = scheduler.timesteps
|
@@ -206,8 +221,8 @@ class LatentConsistencyModelPipeline(
|
|
206
221
|
num_images_per_prompt,
|
207
222
|
do_classifier_free_guidance,
|
208
223
|
negative_prompt=None,
|
209
|
-
prompt_embeds: Optional[torch.
|
210
|
-
negative_prompt_embeds: Optional[torch.
|
224
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
225
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
211
226
|
lora_scale: Optional[float] = None,
|
212
227
|
clip_skip: Optional[int] = None,
|
213
228
|
):
|
@@ -227,10 +242,10 @@ class LatentConsistencyModelPipeline(
|
|
227
242
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
228
243
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
229
244
|
less than `1`).
|
230
|
-
prompt_embeds (`torch.
|
245
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
231
246
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
232
247
|
provided, text embeddings will be generated from `prompt` input argument.
|
233
|
-
negative_prompt_embeds (`torch.
|
248
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
234
249
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
235
250
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
236
251
|
argument.
|
@@ -474,7 +489,12 @@ class LatentConsistencyModelPipeline(
|
|
474
489
|
|
475
490
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
476
491
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
477
|
-
shape = (
|
492
|
+
shape = (
|
493
|
+
batch_size,
|
494
|
+
num_channels_latents,
|
495
|
+
int(height) // self.vae_scale_factor,
|
496
|
+
int(width) // self.vae_scale_factor,
|
497
|
+
)
|
478
498
|
if isinstance(generator, list) and len(generator) != batch_size:
|
479
499
|
raise ValueError(
|
480
500
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -490,20 +510,22 @@ class LatentConsistencyModelPipeline(
|
|
490
510
|
latents = latents * self.scheduler.init_noise_sigma
|
491
511
|
return latents
|
492
512
|
|
493
|
-
def get_guidance_scale_embedding(
|
513
|
+
def get_guidance_scale_embedding(
|
514
|
+
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
515
|
+
) -> torch.Tensor:
|
494
516
|
"""
|
495
517
|
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
496
518
|
|
497
519
|
Args:
|
498
|
-
|
499
|
-
|
520
|
+
w (`torch.Tensor`):
|
521
|
+
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
500
522
|
embedding_dim (`int`, *optional*, defaults to 512):
|
501
|
-
|
502
|
-
dtype:
|
503
|
-
|
523
|
+
Dimension of the embeddings to generate.
|
524
|
+
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
525
|
+
Data type of the generated embeddings.
|
504
526
|
|
505
527
|
Returns:
|
506
|
-
`torch.
|
528
|
+
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
507
529
|
"""
|
508
530
|
assert len(w.shape) == 1
|
509
531
|
w = w * 1000.0
|
@@ -543,7 +565,7 @@ class LatentConsistencyModelPipeline(
|
|
543
565
|
height: int,
|
544
566
|
width: int,
|
545
567
|
callback_steps: int,
|
546
|
-
prompt_embeds: Optional[torch.
|
568
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
547
569
|
ip_adapter_image=None,
|
548
570
|
ip_adapter_image_embeds=None,
|
549
571
|
callback_on_step_end_tensor_inputs=None,
|
@@ -624,10 +646,10 @@ class LatentConsistencyModelPipeline(
|
|
624
646
|
guidance_scale: float = 8.5,
|
625
647
|
num_images_per_prompt: Optional[int] = 1,
|
626
648
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
627
|
-
latents: Optional[torch.
|
628
|
-
prompt_embeds: Optional[torch.
|
649
|
+
latents: Optional[torch.Tensor] = None,
|
650
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
629
651
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
630
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
652
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
631
653
|
output_type: Optional[str] = "pil",
|
632
654
|
return_dict: bool = True,
|
633
655
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -669,20 +691,20 @@ class LatentConsistencyModelPipeline(
|
|
669
691
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
670
692
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
671
693
|
generation deterministic.
|
672
|
-
latents (`torch.
|
694
|
+
latents (`torch.Tensor`, *optional*):
|
673
695
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
674
696
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
675
697
|
tensor is generated by sampling using the supplied random `generator`.
|
676
|
-
prompt_embeds (`torch.
|
698
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
677
699
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
678
700
|
provided, text embeddings are generated from the `prompt` input argument.
|
679
701
|
ip_adapter_image: (`PipelineImageInput`, *optional*):
|
680
702
|
Optional image input to work with IP Adapters.
|
681
|
-
ip_adapter_image_embeds (`List[torch.
|
682
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
683
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
684
|
-
if `do_classifier_free_guidance` is set to `True`.
|
685
|
-
|
703
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
704
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
705
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
706
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
707
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
686
708
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
687
709
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
688
710
|
return_dict (`bool`, *optional*, defaults to `True`):
|