diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. diffusers/__init__.py +26 -1
  2. diffusers/configuration_utils.py +34 -29
  3. diffusers/dependency_versions_table.py +4 -0
  4. diffusers/image_processor.py +125 -12
  5. diffusers/loaders.py +169 -203
  6. diffusers/models/attention.py +24 -1
  7. diffusers/models/attention_flax.py +10 -5
  8. diffusers/models/attention_processor.py +3 -0
  9. diffusers/models/autoencoder_kl.py +114 -33
  10. diffusers/models/controlnet.py +131 -14
  11. diffusers/models/controlnet_flax.py +37 -26
  12. diffusers/models/cross_attention.py +17 -17
  13. diffusers/models/embeddings.py +67 -0
  14. diffusers/models/modeling_flax_utils.py +64 -56
  15. diffusers/models/modeling_utils.py +193 -104
  16. diffusers/models/prior_transformer.py +207 -37
  17. diffusers/models/resnet.py +26 -26
  18. diffusers/models/transformer_2d.py +36 -41
  19. diffusers/models/transformer_temporal.py +24 -21
  20. diffusers/models/unet_1d.py +31 -25
  21. diffusers/models/unet_2d.py +43 -30
  22. diffusers/models/unet_2d_blocks.py +210 -89
  23. diffusers/models/unet_2d_blocks_flax.py +12 -12
  24. diffusers/models/unet_2d_condition.py +172 -64
  25. diffusers/models/unet_2d_condition_flax.py +38 -24
  26. diffusers/models/unet_3d_blocks.py +34 -31
  27. diffusers/models/unet_3d_condition.py +101 -34
  28. diffusers/models/vae.py +5 -5
  29. diffusers/models/vae_flax.py +37 -34
  30. diffusers/models/vq_model.py +23 -14
  31. diffusers/pipelines/__init__.py +24 -1
  32. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
  33. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
  34. diffusers/pipelines/consistency_models/__init__.py +1 -0
  35. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
  36. diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
  37. diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
  38. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
  39. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
  40. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  41. diffusers/pipelines/kandinsky/__init__.py +1 -1
  42. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
  43. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
  44. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
  45. diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
  46. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
  47. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
  48. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
  49. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
  50. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
  51. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
  52. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
  53. diffusers/pipelines/pipeline_flax_utils.py +2 -2
  54. diffusers/pipelines/pipeline_utils.py +124 -146
  55. diffusers/pipelines/shap_e/__init__.py +27 -0
  56. diffusers/pipelines/shap_e/camera.py +147 -0
  57. diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
  58. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
  59. diffusers/pipelines/shap_e/renderer.py +709 -0
  60. diffusers/pipelines/stable_diffusion/__init__.py +2 -0
  61. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
  65. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
  66. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
  67. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  68. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
  69. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
  70. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
  71. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
  72. diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
  73. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
  74. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
  75. diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
  76. diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
  77. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
  78. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
  79. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
  80. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
  81. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
  82. diffusers/schedulers/__init__.py +3 -0
  83. diffusers/schedulers/scheduling_consistency_models.py +380 -0
  84. diffusers/schedulers/scheduling_ddim.py +28 -6
  85. diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
  86. diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
  87. diffusers/schedulers/scheduling_ddpm.py +53 -7
  88. diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
  89. diffusers/schedulers/scheduling_deis_multistep.py +66 -11
  90. diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
  91. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
  92. diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
  93. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
  94. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
  95. diffusers/schedulers/scheduling_euler_discrete.py +58 -8
  96. diffusers/schedulers/scheduling_heun_discrete.py +89 -14
  97. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
  98. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
  99. diffusers/schedulers/scheduling_lms_discrete.py +57 -8
  100. diffusers/schedulers/scheduling_pndm.py +46 -10
  101. diffusers/schedulers/scheduling_repaint.py +19 -4
  102. diffusers/schedulers/scheduling_sde_ve.py +5 -1
  103. diffusers/schedulers/scheduling_unclip.py +43 -4
  104. diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
  105. diffusers/training_utils.py +1 -1
  106. diffusers/utils/__init__.py +2 -1
  107. diffusers/utils/dummy_pt_objects.py +60 -0
  108. diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
  109. diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
  110. diffusers/utils/hub_utils.py +1 -1
  111. diffusers/utils/import_utils.py +20 -3
  112. diffusers/utils/logging.py +15 -18
  113. diffusers/utils/outputs.py +3 -3
  114. diffusers/utils/testing_utils.py +15 -0
  115. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
  116. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
  117. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
  118. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
  119. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
  120. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,6 @@
14
14
 
15
15
 
16
16
  import inspect
17
- import os
18
17
  import warnings
19
18
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
20
19
 
@@ -518,6 +517,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
518
517
  prompt_embeds=None,
519
518
  negative_prompt_embeds=None,
520
519
  controlnet_conditioning_scale=1.0,
520
+ control_guidance_start=0.0,
521
+ control_guidance_end=1.0,
521
522
  ):
522
523
  if (callback_steps is None) or (
523
524
  callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
@@ -586,7 +587,7 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
586
587
  raise ValueError("A single batch of multiple conditionings are supported at the moment.")
587
588
  elif len(image) != len(self.controlnet.nets):
588
589
  raise ValueError(
589
- "For multiple controlnets: `image` must have the same length as the number of controlnets."
590
+ f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
590
591
  )
591
592
 
592
593
  for image_ in image:
@@ -620,6 +621,27 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
620
621
  else:
621
622
  assert False
622
623
 
624
+ if len(control_guidance_start) != len(control_guidance_end):
625
+ raise ValueError(
626
+ f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
627
+ )
628
+
629
+ if isinstance(self.controlnet, MultiControlNetModel):
630
+ if len(control_guidance_start) != len(self.controlnet.nets):
631
+ raise ValueError(
632
+ f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
633
+ )
634
+
635
+ for start, end in zip(control_guidance_start, control_guidance_end):
636
+ if start >= end:
637
+ raise ValueError(
638
+ f"control guidance start: {start} cannot be larger or equal to control guidance end: {end}."
639
+ )
640
+ if start < 0.0:
641
+ raise ValueError(f"control guidance start: {start} can't be smaller than 0.")
642
+ if end > 1.0:
643
+ raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
644
+
623
645
  # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
624
646
  def check_image(self, image, prompt, prompt_embeds):
625
647
  image_is_pil = isinstance(image, PIL.Image.Image)
@@ -757,18 +779,6 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
757
779
 
758
780
  return latents
759
781
 
760
- # override DiffusionPipeline
761
- def save_pretrained(
762
- self,
763
- save_directory: Union[str, os.PathLike],
764
- safe_serialization: bool = False,
765
- variant: Optional[str] = None,
766
- ):
767
- if isinstance(self.controlnet, ControlNetModel):
768
- super().save_pretrained(save_directory, safe_serialization, variant)
769
- else:
770
- raise NotImplementedError("Currently, the `save_pretrained()` is not implemented for Multi-ControlNet.")
771
-
772
782
  @torch.no_grad()
773
783
  @replace_example_docstring(EXAMPLE_DOC_STRING)
774
784
  def __call__(
@@ -809,6 +819,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
809
819
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
810
820
  controlnet_conditioning_scale: Union[float, List[float]] = 0.8,
811
821
  guess_mode: bool = False,
822
+ control_guidance_start: Union[float, List[float]] = 0.0,
823
+ control_guidance_end: Union[float, List[float]] = 1.0,
812
824
  ):
813
825
  r"""
814
826
  Function invoked when calling the pipeline for generation.
@@ -889,6 +901,10 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
889
901
  guess_mode (`bool`, *optional*, defaults to `False`):
890
902
  In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
891
903
  you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
904
+ control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
905
+ The percentage of total steps at which the controlnet starts applying.
906
+ control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
907
+ The percentage of total steps at which the controlnet stops applying.
892
908
 
893
909
  Examples:
894
910
 
@@ -899,6 +915,19 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
899
915
  list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
900
916
  (nsfw) content, according to the `safety_checker`.
901
917
  """
918
+ controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
919
+
920
+ # align format for control guidance
921
+ if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
922
+ control_guidance_start = len(control_guidance_end) * [control_guidance_start]
923
+ elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
924
+ control_guidance_end = len(control_guidance_start) * [control_guidance_end]
925
+ elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
926
+ mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
927
+ control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
928
+ control_guidance_end
929
+ ]
930
+
902
931
  # 1. Check inputs. Raise error if not correct
903
932
  self.check_inputs(
904
933
  prompt,
@@ -908,6 +937,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
908
937
  prompt_embeds,
909
938
  negative_prompt_embeds,
910
939
  controlnet_conditioning_scale,
940
+ control_guidance_start,
941
+ control_guidance_end,
911
942
  )
912
943
 
913
944
  # 2. Define call parameters
@@ -1007,6 +1038,15 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
1007
1038
  # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
1008
1039
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
1009
1040
 
1041
+ # 7.1 Create tensor stating which controlnets to keep
1042
+ controlnet_keep = []
1043
+ for i in range(len(timesteps)):
1044
+ keeps = [
1045
+ 1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
1046
+ for s, e in zip(control_guidance_start, control_guidance_end)
1047
+ ]
1048
+ controlnet_keep.append(keeps[0] if len(keeps) == 1 else keeps)
1049
+
1010
1050
  # 8. Denoising loop
1011
1051
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
1012
1052
  with self.progress_bar(total=num_inference_steps) as progress_bar:
@@ -1025,12 +1065,17 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
1025
1065
  control_model_input = latent_model_input
1026
1066
  controlnet_prompt_embeds = prompt_embeds
1027
1067
 
1068
+ if isinstance(controlnet_keep[i], list):
1069
+ cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
1070
+ else:
1071
+ cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
1072
+
1028
1073
  down_block_res_samples, mid_block_res_sample = self.controlnet(
1029
1074
  control_model_input,
1030
1075
  t,
1031
1076
  encoder_hidden_states=controlnet_prompt_embeds,
1032
1077
  controlnet_cond=control_image,
1033
- conditioning_scale=controlnet_conditioning_scale,
1078
+ conditioning_scale=cond_scale,
1034
1079
  guess_mode=guess_mode,
1035
1080
  return_dict=False,
1036
1081
  )
@@ -15,7 +15,6 @@
15
15
  # This model implementation is heavily inspired by https://github.com/haofanwang/ControlNet-for-Diffusers/
16
16
 
17
17
  import inspect
18
- import os
19
18
  import warnings
20
19
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
21
20
 
@@ -647,6 +646,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
647
646
  prompt_embeds=None,
648
647
  negative_prompt_embeds=None,
649
648
  controlnet_conditioning_scale=1.0,
649
+ control_guidance_start=0.0,
650
+ control_guidance_end=1.0,
650
651
  ):
651
652
  if height % 8 != 0 or width % 8 != 0:
652
653
  raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
@@ -718,7 +719,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
718
719
  raise ValueError("A single batch of multiple conditionings are supported at the moment.")
719
720
  elif len(image) != len(self.controlnet.nets):
720
721
  raise ValueError(
721
- "For multiple controlnets: `image` must have the same length as the number of controlnets."
722
+ f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
722
723
  )
723
724
 
724
725
  for image_ in image:
@@ -752,6 +753,27 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
752
753
  else:
753
754
  assert False
754
755
 
756
+ if len(control_guidance_start) != len(control_guidance_end):
757
+ raise ValueError(
758
+ f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
759
+ )
760
+
761
+ if isinstance(self.controlnet, MultiControlNetModel):
762
+ if len(control_guidance_start) != len(self.controlnet.nets):
763
+ raise ValueError(
764
+ f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
765
+ )
766
+
767
+ for start, end in zip(control_guidance_start, control_guidance_end):
768
+ if start >= end:
769
+ raise ValueError(
770
+ f"control guidance start: {start} cannot be larger or equal to control guidance end: {end}."
771
+ )
772
+ if start < 0.0:
773
+ raise ValueError(f"control guidance start: {start} can't be smaller than 0.")
774
+ if end > 1.0:
775
+ raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
776
+
755
777
  # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
756
778
  def check_image(self, image, prompt, prompt_embeds):
757
779
  image_is_pil = isinstance(image, PIL.Image.Image)
@@ -957,18 +979,6 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
957
979
 
958
980
  return image_latents
959
981
 
960
- # override DiffusionPipeline
961
- def save_pretrained(
962
- self,
963
- save_directory: Union[str, os.PathLike],
964
- safe_serialization: bool = False,
965
- variant: Optional[str] = None,
966
- ):
967
- if isinstance(self.controlnet, ControlNetModel):
968
- super().save_pretrained(save_directory, safe_serialization, variant)
969
- else:
970
- raise NotImplementedError("Currently, the `save_pretrained()` is not implemented for Multi-ControlNet.")
971
-
972
982
  @torch.no_grad()
973
983
  @replace_example_docstring(EXAMPLE_DOC_STRING)
974
984
  def __call__(
@@ -1003,6 +1013,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
1003
1013
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
1004
1014
  controlnet_conditioning_scale: Union[float, List[float]] = 0.5,
1005
1015
  guess_mode: bool = False,
1016
+ control_guidance_start: Union[float, List[float]] = 0.0,
1017
+ control_guidance_end: Union[float, List[float]] = 1.0,
1006
1018
  ):
1007
1019
  r"""
1008
1020
  Function invoked when calling the pipeline for generation.
@@ -1086,6 +1098,10 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
1086
1098
  guess_mode (`bool`, *optional*, defaults to `False`):
1087
1099
  In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
1088
1100
  you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
1101
+ control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
1102
+ The percentage of total steps at which the controlnet starts applying.
1103
+ control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
1104
+ The percentage of total steps at which the controlnet stops applying.
1089
1105
 
1090
1106
  Examples:
1091
1107
 
@@ -1096,9 +1112,22 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
1096
1112
  list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
1097
1113
  (nsfw) content, according to the `safety_checker`.
1098
1114
  """
1115
+ controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
1116
+
1099
1117
  # 0. Default height and width to unet
1100
1118
  height, width = self._default_height_width(height, width, image)
1101
1119
 
1120
+ # align format for control guidance
1121
+ if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
1122
+ control_guidance_start = len(control_guidance_end) * [control_guidance_start]
1123
+ elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
1124
+ control_guidance_end = len(control_guidance_start) * [control_guidance_end]
1125
+ elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
1126
+ mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
1127
+ control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
1128
+ control_guidance_end
1129
+ ]
1130
+
1102
1131
  # 1. Check inputs. Raise error if not correct
1103
1132
  self.check_inputs(
1104
1133
  prompt,
@@ -1110,6 +1139,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
1110
1139
  prompt_embeds,
1111
1140
  negative_prompt_embeds,
1112
1141
  controlnet_conditioning_scale,
1142
+ control_guidance_start,
1143
+ control_guidance_end,
1113
1144
  )
1114
1145
 
1115
1146
  # 2. Define call parameters
@@ -1126,8 +1157,6 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
1126
1157
  # corresponds to doing no classifier free guidance.
1127
1158
  do_classifier_free_guidance = guidance_scale > 1.0
1128
1159
 
1129
- controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
1130
-
1131
1160
  if isinstance(controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float):
1132
1161
  controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(controlnet.nets)
1133
1162
 
@@ -1244,6 +1273,15 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
1244
1273
  # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
1245
1274
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
1246
1275
 
1276
+ # 7.1 Create tensor stating which controlnets to keep
1277
+ controlnet_keep = []
1278
+ for i in range(len(timesteps)):
1279
+ keeps = [
1280
+ 1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
1281
+ for s, e in zip(control_guidance_start, control_guidance_end)
1282
+ ]
1283
+ controlnet_keep.append(keeps[0] if len(keeps) == 1 else keeps)
1284
+
1247
1285
  # 8. Denoising loop
1248
1286
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
1249
1287
  with self.progress_bar(total=num_inference_steps) as progress_bar:
@@ -1262,12 +1300,17 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
1262
1300
  control_model_input = latent_model_input
1263
1301
  controlnet_prompt_embeds = prompt_embeds
1264
1302
 
1303
+ if isinstance(controlnet_keep[i], list):
1304
+ cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
1305
+ else:
1306
+ cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
1307
+
1265
1308
  down_block_res_samples, mid_block_res_sample = self.controlnet(
1266
1309
  control_model_input,
1267
1310
  t,
1268
1311
  encoder_hidden_states=controlnet_prompt_embeds,
1269
1312
  controlnet_cond=control_image,
1270
- conditioning_scale=controlnet_conditioning_scale,
1313
+ conditioning_scale=cond_scale,
1271
1314
  guess_mode=guess_mode,
1272
1315
  return_dict=False,
1273
1316
  )
@@ -464,7 +464,7 @@ class FlaxStableDiffusionControlNetPipeline(FlaxDiffusionPipeline):
464
464
 
465
465
  images_uint8_casted = np.asarray(images_uint8_casted).reshape(num_devices * batch_size, height, width, 3)
466
466
  images_uint8_casted, has_nsfw_concept = self._run_safety_checker(images_uint8_casted, safety_params, jit)
467
- images = np.asarray(images)
467
+ images = np.array(images)
468
468
 
469
469
  # block images
470
470
  if any(has_nsfw_concept):
@@ -15,5 +15,5 @@ else:
15
15
  from .pipeline_kandinsky import KandinskyPipeline
16
16
  from .pipeline_kandinsky_img2img import KandinskyImg2ImgPipeline
17
17
  from .pipeline_kandinsky_inpaint import KandinskyInpaintPipeline
18
- from .pipeline_kandinsky_prior import KandinskyPriorPipeline
18
+ from .pipeline_kandinsky_prior import KandinskyPriorPipeline, KandinskyPriorPipelineOutput
19
19
  from .text_encoder import MultilingualCLIP
@@ -22,7 +22,7 @@ from transformers import (
22
22
  from ...models import UNet2DConditionModel, VQModel
23
23
  from ...pipelines import DiffusionPipeline
24
24
  from ...pipelines.pipeline_utils import ImagePipelineOutput
25
- from ...schedulers import DDIMScheduler
25
+ from ...schedulers import DDIMScheduler, DDPMScheduler
26
26
  from ...utils import (
27
27
  is_accelerate_available,
28
28
  is_accelerate_version,
@@ -88,7 +88,7 @@ class KandinskyPipeline(DiffusionPipeline):
88
88
  Frozen text-encoder.
89
89
  tokenizer ([`XLMRobertaTokenizer`]):
90
90
  Tokenizer of class
91
- scheduler ([`DDIMScheduler`]):
91
+ scheduler (Union[`DDIMScheduler`,`DDPMScheduler`]):
92
92
  A scheduler to be used in combination with `unet` to generate image latents.
93
93
  unet ([`UNet2DConditionModel`]):
94
94
  Conditional U-Net architecture to denoise the image embedding.
@@ -101,7 +101,7 @@ class KandinskyPipeline(DiffusionPipeline):
101
101
  text_encoder: MultilingualCLIP,
102
102
  tokenizer: XLMRobertaTokenizer,
103
103
  unet: UNet2DConditionModel,
104
- scheduler: DDIMScheduler,
104
+ scheduler: Union[DDIMScheduler, DDPMScheduler],
105
105
  movq: VQModel,
106
106
  ):
107
107
  super().__init__()
@@ -115,6 +115,7 @@ class KandinskyPipeline(DiffusionPipeline):
115
115
  )
116
116
  self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
117
117
 
118
+ # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
118
119
  def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
119
120
  if latents is None:
120
121
  latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
@@ -439,9 +440,6 @@ class KandinskyPipeline(DiffusionPipeline):
439
440
  noise_pred,
440
441
  t,
441
442
  latents,
442
- # YiYi notes: only reason this pipeline can't work with unclip scheduler is that can't pass down this argument
443
- # need to use DDPM scheduler instead
444
- # prev_timestep=prev_timestep,
445
443
  generator=generator,
446
444
  ).prev_sample
447
445
  # post-processing
@@ -275,6 +275,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
275
275
  )
276
276
  self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
277
277
 
278
+ # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
278
279
  def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
279
280
  if latents is None:
280
281
  latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
@@ -274,6 +274,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
274
274
 
275
275
  return KandinskyPriorPipelineOutput(image_embeds=image_emb, negative_image_embeds=zero_image_emb)
276
276
 
277
+ # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
277
278
  def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
278
279
  if latents is None:
279
280
  latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
@@ -0,0 +1,7 @@
1
+ from .pipeline_kandinsky2_2 import KandinskyV22Pipeline
2
+ from .pipeline_kandinsky2_2_controlnet import KandinskyV22ControlnetPipeline
3
+ from .pipeline_kandinsky2_2_controlnet_img2img import KandinskyV22ControlnetImg2ImgPipeline
4
+ from .pipeline_kandinsky2_2_img2img import KandinskyV22Img2ImgPipeline
5
+ from .pipeline_kandinsky2_2_inpainting import KandinskyV22InpaintPipeline
6
+ from .pipeline_kandinsky2_2_prior import KandinskyV22PriorPipeline
7
+ from .pipeline_kandinsky2_2_prior_emb2emb import KandinskyV22PriorEmb2EmbPipeline