diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +26 -1
- diffusers/configuration_utils.py +34 -29
- diffusers/dependency_versions_table.py +4 -0
- diffusers/image_processor.py +125 -12
- diffusers/loaders.py +169 -203
- diffusers/models/attention.py +24 -1
- diffusers/models/attention_flax.py +10 -5
- diffusers/models/attention_processor.py +3 -0
- diffusers/models/autoencoder_kl.py +114 -33
- diffusers/models/controlnet.py +131 -14
- diffusers/models/controlnet_flax.py +37 -26
- diffusers/models/cross_attention.py +17 -17
- diffusers/models/embeddings.py +67 -0
- diffusers/models/modeling_flax_utils.py +64 -56
- diffusers/models/modeling_utils.py +193 -104
- diffusers/models/prior_transformer.py +207 -37
- diffusers/models/resnet.py +26 -26
- diffusers/models/transformer_2d.py +36 -41
- diffusers/models/transformer_temporal.py +24 -21
- diffusers/models/unet_1d.py +31 -25
- diffusers/models/unet_2d.py +43 -30
- diffusers/models/unet_2d_blocks.py +210 -89
- diffusers/models/unet_2d_blocks_flax.py +12 -12
- diffusers/models/unet_2d_condition.py +172 -64
- diffusers/models/unet_2d_condition_flax.py +38 -24
- diffusers/models/unet_3d_blocks.py +34 -31
- diffusers/models/unet_3d_condition.py +101 -34
- diffusers/models/vae.py +5 -5
- diffusers/models/vae_flax.py +37 -34
- diffusers/models/vq_model.py +23 -14
- diffusers/pipelines/__init__.py +24 -1
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
- diffusers/pipelines/consistency_models/__init__.py +1 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
- diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/kandinsky/__init__.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
- diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
- diffusers/pipelines/pipeline_flax_utils.py +2 -2
- diffusers/pipelines/pipeline_utils.py +124 -146
- diffusers/pipelines/shap_e/__init__.py +27 -0
- diffusers/pipelines/shap_e/camera.py +147 -0
- diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
- diffusers/pipelines/shap_e/renderer.py +709 -0
- diffusers/pipelines/stable_diffusion/__init__.py +2 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
- diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
- diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
- diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
- diffusers/schedulers/__init__.py +3 -0
- diffusers/schedulers/scheduling_consistency_models.py +380 -0
- diffusers/schedulers/scheduling_ddim.py +28 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
- diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
- diffusers/schedulers/scheduling_ddpm.py +53 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
- diffusers/schedulers/scheduling_deis_multistep.py +66 -11
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
- diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
- diffusers/schedulers/scheduling_euler_discrete.py +58 -8
- diffusers/schedulers/scheduling_heun_discrete.py +89 -14
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
- diffusers/schedulers/scheduling_lms_discrete.py +57 -8
- diffusers/schedulers/scheduling_pndm.py +46 -10
- diffusers/schedulers/scheduling_repaint.py +19 -4
- diffusers/schedulers/scheduling_sde_ve.py +5 -1
- diffusers/schedulers/scheduling_unclip.py +43 -4
- diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
- diffusers/training_utils.py +1 -1
- diffusers/utils/__init__.py +2 -1
- diffusers/utils/dummy_pt_objects.py +60 -0
- diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
- diffusers/utils/hub_utils.py +1 -1
- diffusers/utils/import_utils.py +20 -3
- diffusers/utils/logging.py +15 -18
- diffusers/utils/outputs.py +3 -3
- diffusers/utils/testing_utils.py +15 -0
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,6 @@
|
|
14
14
|
|
15
15
|
|
16
16
|
import inspect
|
17
|
-
import os
|
18
17
|
import warnings
|
19
18
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
20
19
|
|
@@ -518,6 +517,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
518
517
|
prompt_embeds=None,
|
519
518
|
negative_prompt_embeds=None,
|
520
519
|
controlnet_conditioning_scale=1.0,
|
520
|
+
control_guidance_start=0.0,
|
521
|
+
control_guidance_end=1.0,
|
521
522
|
):
|
522
523
|
if (callback_steps is None) or (
|
523
524
|
callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
|
@@ -586,7 +587,7 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
586
587
|
raise ValueError("A single batch of multiple conditionings are supported at the moment.")
|
587
588
|
elif len(image) != len(self.controlnet.nets):
|
588
589
|
raise ValueError(
|
589
|
-
"For multiple controlnets: `image` must have the same length as the number of controlnets."
|
590
|
+
f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
|
590
591
|
)
|
591
592
|
|
592
593
|
for image_ in image:
|
@@ -620,6 +621,27 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
620
621
|
else:
|
621
622
|
assert False
|
622
623
|
|
624
|
+
if len(control_guidance_start) != len(control_guidance_end):
|
625
|
+
raise ValueError(
|
626
|
+
f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
|
627
|
+
)
|
628
|
+
|
629
|
+
if isinstance(self.controlnet, MultiControlNetModel):
|
630
|
+
if len(control_guidance_start) != len(self.controlnet.nets):
|
631
|
+
raise ValueError(
|
632
|
+
f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
|
633
|
+
)
|
634
|
+
|
635
|
+
for start, end in zip(control_guidance_start, control_guidance_end):
|
636
|
+
if start >= end:
|
637
|
+
raise ValueError(
|
638
|
+
f"control guidance start: {start} cannot be larger or equal to control guidance end: {end}."
|
639
|
+
)
|
640
|
+
if start < 0.0:
|
641
|
+
raise ValueError(f"control guidance start: {start} can't be smaller than 0.")
|
642
|
+
if end > 1.0:
|
643
|
+
raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
|
644
|
+
|
623
645
|
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
|
624
646
|
def check_image(self, image, prompt, prompt_embeds):
|
625
647
|
image_is_pil = isinstance(image, PIL.Image.Image)
|
@@ -757,18 +779,6 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
757
779
|
|
758
780
|
return latents
|
759
781
|
|
760
|
-
# override DiffusionPipeline
|
761
|
-
def save_pretrained(
|
762
|
-
self,
|
763
|
-
save_directory: Union[str, os.PathLike],
|
764
|
-
safe_serialization: bool = False,
|
765
|
-
variant: Optional[str] = None,
|
766
|
-
):
|
767
|
-
if isinstance(self.controlnet, ControlNetModel):
|
768
|
-
super().save_pretrained(save_directory, safe_serialization, variant)
|
769
|
-
else:
|
770
|
-
raise NotImplementedError("Currently, the `save_pretrained()` is not implemented for Multi-ControlNet.")
|
771
|
-
|
772
782
|
@torch.no_grad()
|
773
783
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
774
784
|
def __call__(
|
@@ -809,6 +819,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
809
819
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
810
820
|
controlnet_conditioning_scale: Union[float, List[float]] = 0.8,
|
811
821
|
guess_mode: bool = False,
|
822
|
+
control_guidance_start: Union[float, List[float]] = 0.0,
|
823
|
+
control_guidance_end: Union[float, List[float]] = 1.0,
|
812
824
|
):
|
813
825
|
r"""
|
814
826
|
Function invoked when calling the pipeline for generation.
|
@@ -889,6 +901,10 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
889
901
|
guess_mode (`bool`, *optional*, defaults to `False`):
|
890
902
|
In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
|
891
903
|
you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
|
904
|
+
control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
|
905
|
+
The percentage of total steps at which the controlnet starts applying.
|
906
|
+
control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
|
907
|
+
The percentage of total steps at which the controlnet stops applying.
|
892
908
|
|
893
909
|
Examples:
|
894
910
|
|
@@ -899,6 +915,19 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
899
915
|
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
900
916
|
(nsfw) content, according to the `safety_checker`.
|
901
917
|
"""
|
918
|
+
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
919
|
+
|
920
|
+
# align format for control guidance
|
921
|
+
if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
|
922
|
+
control_guidance_start = len(control_guidance_end) * [control_guidance_start]
|
923
|
+
elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
|
924
|
+
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
925
|
+
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
926
|
+
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
|
927
|
+
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
928
|
+
control_guidance_end
|
929
|
+
]
|
930
|
+
|
902
931
|
# 1. Check inputs. Raise error if not correct
|
903
932
|
self.check_inputs(
|
904
933
|
prompt,
|
@@ -908,6 +937,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
908
937
|
prompt_embeds,
|
909
938
|
negative_prompt_embeds,
|
910
939
|
controlnet_conditioning_scale,
|
940
|
+
control_guidance_start,
|
941
|
+
control_guidance_end,
|
911
942
|
)
|
912
943
|
|
913
944
|
# 2. Define call parameters
|
@@ -1007,6 +1038,15 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
1007
1038
|
# 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
1008
1039
|
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
1009
1040
|
|
1041
|
+
# 7.1 Create tensor stating which controlnets to keep
|
1042
|
+
controlnet_keep = []
|
1043
|
+
for i in range(len(timesteps)):
|
1044
|
+
keeps = [
|
1045
|
+
1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
|
1046
|
+
for s, e in zip(control_guidance_start, control_guidance_end)
|
1047
|
+
]
|
1048
|
+
controlnet_keep.append(keeps[0] if len(keeps) == 1 else keeps)
|
1049
|
+
|
1010
1050
|
# 8. Denoising loop
|
1011
1051
|
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
1012
1052
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
@@ -1025,12 +1065,17 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
|
|
1025
1065
|
control_model_input = latent_model_input
|
1026
1066
|
controlnet_prompt_embeds = prompt_embeds
|
1027
1067
|
|
1068
|
+
if isinstance(controlnet_keep[i], list):
|
1069
|
+
cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
|
1070
|
+
else:
|
1071
|
+
cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
|
1072
|
+
|
1028
1073
|
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
1029
1074
|
control_model_input,
|
1030
1075
|
t,
|
1031
1076
|
encoder_hidden_states=controlnet_prompt_embeds,
|
1032
1077
|
controlnet_cond=control_image,
|
1033
|
-
conditioning_scale=
|
1078
|
+
conditioning_scale=cond_scale,
|
1034
1079
|
guess_mode=guess_mode,
|
1035
1080
|
return_dict=False,
|
1036
1081
|
)
|
@@ -15,7 +15,6 @@
|
|
15
15
|
# This model implementation is heavily inspired by https://github.com/haofanwang/ControlNet-for-Diffusers/
|
16
16
|
|
17
17
|
import inspect
|
18
|
-
import os
|
19
18
|
import warnings
|
20
19
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
21
20
|
|
@@ -647,6 +646,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
647
646
|
prompt_embeds=None,
|
648
647
|
negative_prompt_embeds=None,
|
649
648
|
controlnet_conditioning_scale=1.0,
|
649
|
+
control_guidance_start=0.0,
|
650
|
+
control_guidance_end=1.0,
|
650
651
|
):
|
651
652
|
if height % 8 != 0 or width % 8 != 0:
|
652
653
|
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
|
@@ -718,7 +719,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
718
719
|
raise ValueError("A single batch of multiple conditionings are supported at the moment.")
|
719
720
|
elif len(image) != len(self.controlnet.nets):
|
720
721
|
raise ValueError(
|
721
|
-
"For multiple controlnets: `image` must have the same length as the number of controlnets."
|
722
|
+
f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
|
722
723
|
)
|
723
724
|
|
724
725
|
for image_ in image:
|
@@ -752,6 +753,27 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
752
753
|
else:
|
753
754
|
assert False
|
754
755
|
|
756
|
+
if len(control_guidance_start) != len(control_guidance_end):
|
757
|
+
raise ValueError(
|
758
|
+
f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
|
759
|
+
)
|
760
|
+
|
761
|
+
if isinstance(self.controlnet, MultiControlNetModel):
|
762
|
+
if len(control_guidance_start) != len(self.controlnet.nets):
|
763
|
+
raise ValueError(
|
764
|
+
f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
|
765
|
+
)
|
766
|
+
|
767
|
+
for start, end in zip(control_guidance_start, control_guidance_end):
|
768
|
+
if start >= end:
|
769
|
+
raise ValueError(
|
770
|
+
f"control guidance start: {start} cannot be larger or equal to control guidance end: {end}."
|
771
|
+
)
|
772
|
+
if start < 0.0:
|
773
|
+
raise ValueError(f"control guidance start: {start} can't be smaller than 0.")
|
774
|
+
if end > 1.0:
|
775
|
+
raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
|
776
|
+
|
755
777
|
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
|
756
778
|
def check_image(self, image, prompt, prompt_embeds):
|
757
779
|
image_is_pil = isinstance(image, PIL.Image.Image)
|
@@ -957,18 +979,6 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
957
979
|
|
958
980
|
return image_latents
|
959
981
|
|
960
|
-
# override DiffusionPipeline
|
961
|
-
def save_pretrained(
|
962
|
-
self,
|
963
|
-
save_directory: Union[str, os.PathLike],
|
964
|
-
safe_serialization: bool = False,
|
965
|
-
variant: Optional[str] = None,
|
966
|
-
):
|
967
|
-
if isinstance(self.controlnet, ControlNetModel):
|
968
|
-
super().save_pretrained(save_directory, safe_serialization, variant)
|
969
|
-
else:
|
970
|
-
raise NotImplementedError("Currently, the `save_pretrained()` is not implemented for Multi-ControlNet.")
|
971
|
-
|
972
982
|
@torch.no_grad()
|
973
983
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
974
984
|
def __call__(
|
@@ -1003,6 +1013,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
1003
1013
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
1004
1014
|
controlnet_conditioning_scale: Union[float, List[float]] = 0.5,
|
1005
1015
|
guess_mode: bool = False,
|
1016
|
+
control_guidance_start: Union[float, List[float]] = 0.0,
|
1017
|
+
control_guidance_end: Union[float, List[float]] = 1.0,
|
1006
1018
|
):
|
1007
1019
|
r"""
|
1008
1020
|
Function invoked when calling the pipeline for generation.
|
@@ -1086,6 +1098,10 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
1086
1098
|
guess_mode (`bool`, *optional*, defaults to `False`):
|
1087
1099
|
In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
|
1088
1100
|
you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
|
1101
|
+
control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
|
1102
|
+
The percentage of total steps at which the controlnet starts applying.
|
1103
|
+
control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
|
1104
|
+
The percentage of total steps at which the controlnet stops applying.
|
1089
1105
|
|
1090
1106
|
Examples:
|
1091
1107
|
|
@@ -1096,9 +1112,22 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
1096
1112
|
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
1097
1113
|
(nsfw) content, according to the `safety_checker`.
|
1098
1114
|
"""
|
1115
|
+
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
1116
|
+
|
1099
1117
|
# 0. Default height and width to unet
|
1100
1118
|
height, width = self._default_height_width(height, width, image)
|
1101
1119
|
|
1120
|
+
# align format for control guidance
|
1121
|
+
if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
|
1122
|
+
control_guidance_start = len(control_guidance_end) * [control_guidance_start]
|
1123
|
+
elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
|
1124
|
+
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
1125
|
+
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
1126
|
+
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
|
1127
|
+
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
1128
|
+
control_guidance_end
|
1129
|
+
]
|
1130
|
+
|
1102
1131
|
# 1. Check inputs. Raise error if not correct
|
1103
1132
|
self.check_inputs(
|
1104
1133
|
prompt,
|
@@ -1110,6 +1139,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
1110
1139
|
prompt_embeds,
|
1111
1140
|
negative_prompt_embeds,
|
1112
1141
|
controlnet_conditioning_scale,
|
1142
|
+
control_guidance_start,
|
1143
|
+
control_guidance_end,
|
1113
1144
|
)
|
1114
1145
|
|
1115
1146
|
# 2. Define call parameters
|
@@ -1126,8 +1157,6 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
1126
1157
|
# corresponds to doing no classifier free guidance.
|
1127
1158
|
do_classifier_free_guidance = guidance_scale > 1.0
|
1128
1159
|
|
1129
|
-
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
1130
|
-
|
1131
1160
|
if isinstance(controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float):
|
1132
1161
|
controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(controlnet.nets)
|
1133
1162
|
|
@@ -1244,6 +1273,15 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
1244
1273
|
# 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
1245
1274
|
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
1246
1275
|
|
1276
|
+
# 7.1 Create tensor stating which controlnets to keep
|
1277
|
+
controlnet_keep = []
|
1278
|
+
for i in range(len(timesteps)):
|
1279
|
+
keeps = [
|
1280
|
+
1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
|
1281
|
+
for s, e in zip(control_guidance_start, control_guidance_end)
|
1282
|
+
]
|
1283
|
+
controlnet_keep.append(keeps[0] if len(keeps) == 1 else keeps)
|
1284
|
+
|
1247
1285
|
# 8. Denoising loop
|
1248
1286
|
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
1249
1287
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
@@ -1262,12 +1300,17 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
|
|
1262
1300
|
control_model_input = latent_model_input
|
1263
1301
|
controlnet_prompt_embeds = prompt_embeds
|
1264
1302
|
|
1303
|
+
if isinstance(controlnet_keep[i], list):
|
1304
|
+
cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
|
1305
|
+
else:
|
1306
|
+
cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
|
1307
|
+
|
1265
1308
|
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
1266
1309
|
control_model_input,
|
1267
1310
|
t,
|
1268
1311
|
encoder_hidden_states=controlnet_prompt_embeds,
|
1269
1312
|
controlnet_cond=control_image,
|
1270
|
-
conditioning_scale=
|
1313
|
+
conditioning_scale=cond_scale,
|
1271
1314
|
guess_mode=guess_mode,
|
1272
1315
|
return_dict=False,
|
1273
1316
|
)
|
@@ -464,7 +464,7 @@ class FlaxStableDiffusionControlNetPipeline(FlaxDiffusionPipeline):
|
|
464
464
|
|
465
465
|
images_uint8_casted = np.asarray(images_uint8_casted).reshape(num_devices * batch_size, height, width, 3)
|
466
466
|
images_uint8_casted, has_nsfw_concept = self._run_safety_checker(images_uint8_casted, safety_params, jit)
|
467
|
-
images = np.
|
467
|
+
images = np.array(images)
|
468
468
|
|
469
469
|
# block images
|
470
470
|
if any(has_nsfw_concept):
|
@@ -15,5 +15,5 @@ else:
|
|
15
15
|
from .pipeline_kandinsky import KandinskyPipeline
|
16
16
|
from .pipeline_kandinsky_img2img import KandinskyImg2ImgPipeline
|
17
17
|
from .pipeline_kandinsky_inpaint import KandinskyInpaintPipeline
|
18
|
-
from .pipeline_kandinsky_prior import KandinskyPriorPipeline
|
18
|
+
from .pipeline_kandinsky_prior import KandinskyPriorPipeline, KandinskyPriorPipelineOutput
|
19
19
|
from .text_encoder import MultilingualCLIP
|
@@ -22,7 +22,7 @@ from transformers import (
|
|
22
22
|
from ...models import UNet2DConditionModel, VQModel
|
23
23
|
from ...pipelines import DiffusionPipeline
|
24
24
|
from ...pipelines.pipeline_utils import ImagePipelineOutput
|
25
|
-
from ...schedulers import DDIMScheduler
|
25
|
+
from ...schedulers import DDIMScheduler, DDPMScheduler
|
26
26
|
from ...utils import (
|
27
27
|
is_accelerate_available,
|
28
28
|
is_accelerate_version,
|
@@ -88,7 +88,7 @@ class KandinskyPipeline(DiffusionPipeline):
|
|
88
88
|
Frozen text-encoder.
|
89
89
|
tokenizer ([`XLMRobertaTokenizer`]):
|
90
90
|
Tokenizer of class
|
91
|
-
scheduler ([`DDIMScheduler`]):
|
91
|
+
scheduler (Union[`DDIMScheduler`,`DDPMScheduler`]):
|
92
92
|
A scheduler to be used in combination with `unet` to generate image latents.
|
93
93
|
unet ([`UNet2DConditionModel`]):
|
94
94
|
Conditional U-Net architecture to denoise the image embedding.
|
@@ -101,7 +101,7 @@ class KandinskyPipeline(DiffusionPipeline):
|
|
101
101
|
text_encoder: MultilingualCLIP,
|
102
102
|
tokenizer: XLMRobertaTokenizer,
|
103
103
|
unet: UNet2DConditionModel,
|
104
|
-
scheduler: DDIMScheduler,
|
104
|
+
scheduler: Union[DDIMScheduler, DDPMScheduler],
|
105
105
|
movq: VQModel,
|
106
106
|
):
|
107
107
|
super().__init__()
|
@@ -115,6 +115,7 @@ class KandinskyPipeline(DiffusionPipeline):
|
|
115
115
|
)
|
116
116
|
self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
|
117
117
|
|
118
|
+
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
118
119
|
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
119
120
|
if latents is None:
|
120
121
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
@@ -439,9 +440,6 @@ class KandinskyPipeline(DiffusionPipeline):
|
|
439
440
|
noise_pred,
|
440
441
|
t,
|
441
442
|
latents,
|
442
|
-
# YiYi notes: only reason this pipeline can't work with unclip scheduler is that can't pass down this argument
|
443
|
-
# need to use DDPM scheduler instead
|
444
|
-
# prev_timestep=prev_timestep,
|
445
443
|
generator=generator,
|
446
444
|
).prev_sample
|
447
445
|
# post-processing
|
@@ -275,6 +275,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
|
|
275
275
|
)
|
276
276
|
self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
|
277
277
|
|
278
|
+
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
278
279
|
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
279
280
|
if latents is None:
|
280
281
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
@@ -274,6 +274,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
|
274
274
|
|
275
275
|
return KandinskyPriorPipelineOutput(image_embeds=image_emb, negative_image_embeds=zero_image_emb)
|
276
276
|
|
277
|
+
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
277
278
|
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
278
279
|
if latents is None:
|
279
280
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
from .pipeline_kandinsky2_2 import KandinskyV22Pipeline
|
2
|
+
from .pipeline_kandinsky2_2_controlnet import KandinskyV22ControlnetPipeline
|
3
|
+
from .pipeline_kandinsky2_2_controlnet_img2img import KandinskyV22ControlnetImg2ImgPipeline
|
4
|
+
from .pipeline_kandinsky2_2_img2img import KandinskyV22Img2ImgPipeline
|
5
|
+
from .pipeline_kandinsky2_2_inpainting import KandinskyV22InpaintPipeline
|
6
|
+
from .pipeline_kandinsky2_2_prior import KandinskyV22PriorPipeline
|
7
|
+
from .pipeline_kandinsky2_2_prior_emb2emb import KandinskyV22PriorEmb2EmbPipeline
|