diffusers 0.23.1__py3-none-any.whl → 0.24.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. diffusers/__init__.py +16 -2
  2. diffusers/configuration_utils.py +1 -0
  3. diffusers/dependency_versions_check.py +0 -1
  4. diffusers/dependency_versions_table.py +4 -5
  5. diffusers/image_processor.py +186 -14
  6. diffusers/loaders/__init__.py +82 -0
  7. diffusers/loaders/ip_adapter.py +157 -0
  8. diffusers/loaders/lora.py +1415 -0
  9. diffusers/loaders/lora_conversion_utils.py +284 -0
  10. diffusers/loaders/single_file.py +631 -0
  11. diffusers/loaders/textual_inversion.py +459 -0
  12. diffusers/loaders/unet.py +735 -0
  13. diffusers/loaders/utils.py +59 -0
  14. diffusers/models/__init__.py +12 -1
  15. diffusers/models/attention.py +165 -14
  16. diffusers/models/attention_flax.py +9 -1
  17. diffusers/models/attention_processor.py +286 -1
  18. diffusers/models/autoencoder_asym_kl.py +14 -9
  19. diffusers/models/autoencoder_kl.py +3 -18
  20. diffusers/models/autoencoder_kl_temporal_decoder.py +402 -0
  21. diffusers/models/autoencoder_tiny.py +20 -24
  22. diffusers/models/consistency_decoder_vae.py +37 -30
  23. diffusers/models/controlnet.py +59 -39
  24. diffusers/models/controlnet_flax.py +19 -18
  25. diffusers/models/embeddings_flax.py +2 -0
  26. diffusers/models/lora.py +131 -1
  27. diffusers/models/modeling_flax_utils.py +2 -1
  28. diffusers/models/modeling_outputs.py +17 -0
  29. diffusers/models/modeling_utils.py +27 -19
  30. diffusers/models/normalization.py +2 -2
  31. diffusers/models/resnet.py +390 -59
  32. diffusers/models/transformer_2d.py +20 -3
  33. diffusers/models/transformer_temporal.py +183 -1
  34. diffusers/models/unet_2d_blocks_flax.py +5 -0
  35. diffusers/models/unet_2d_condition.py +9 -0
  36. diffusers/models/unet_2d_condition_flax.py +13 -13
  37. diffusers/models/unet_3d_blocks.py +957 -173
  38. diffusers/models/unet_3d_condition.py +16 -8
  39. diffusers/models/unet_kandi3.py +589 -0
  40. diffusers/models/unet_motion_model.py +48 -33
  41. diffusers/models/unet_spatio_temporal_condition.py +489 -0
  42. diffusers/models/vae.py +63 -13
  43. diffusers/models/vae_flax.py +7 -0
  44. diffusers/models/vq_model.py +3 -1
  45. diffusers/optimization.py +16 -9
  46. diffusers/pipelines/__init__.py +65 -12
  47. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +93 -23
  48. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +97 -25
  49. diffusers/pipelines/animatediff/pipeline_animatediff.py +34 -4
  50. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -0
  51. diffusers/pipelines/auto_pipeline.py +6 -0
  52. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -0
  53. diffusers/pipelines/controlnet/pipeline_controlnet.py +217 -31
  54. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +101 -32
  55. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +136 -39
  56. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +119 -37
  57. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +196 -35
  58. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +102 -31
  59. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +1 -0
  60. diffusers/pipelines/ddim/pipeline_ddim.py +1 -0
  61. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -0
  62. diffusers/pipelines/deepfloyd_if/pipeline_if.py +13 -1
  63. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +13 -1
  64. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +13 -1
  65. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +13 -1
  66. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +13 -1
  67. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +13 -1
  68. diffusers/pipelines/dit/pipeline_dit.py +1 -0
  69. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +1 -1
  70. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +3 -3
  71. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  72. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +1 -1
  73. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +1 -1
  74. diffusers/pipelines/kandinsky3/__init__.py +49 -0
  75. diffusers/pipelines/kandinsky3/kandinsky3_pipeline.py +452 -0
  76. diffusers/pipelines/kandinsky3/kandinsky3img2img_pipeline.py +460 -0
  77. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +65 -6
  78. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +55 -3
  79. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -0
  80. diffusers/pipelines/musicldm/pipeline_musicldm.py +1 -1
  81. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +7 -2
  82. diffusers/pipelines/pipeline_flax_utils.py +4 -2
  83. diffusers/pipelines/pipeline_utils.py +33 -13
  84. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +196 -36
  85. diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py +1 -0
  86. diffusers/pipelines/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -0
  87. diffusers/pipelines/stable_diffusion/__init__.py +64 -21
  88. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +8 -3
  89. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +18 -2
  90. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +2 -2
  91. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +2 -4
  92. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -0
  93. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py +1 -0
  94. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +88 -9
  95. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +1 -0
  96. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +8 -3
  97. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +1 -0
  98. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +1 -0
  99. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py +1 -0
  100. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +1 -0
  101. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +92 -9
  102. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +92 -9
  103. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +1 -0
  104. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -13
  105. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -0
  106. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +1 -0
  107. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +1 -0
  108. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +1 -0
  109. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -0
  110. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +1 -0
  111. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +1 -0
  112. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +1 -0
  113. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +1 -0
  114. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +103 -8
  115. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +113 -8
  116. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +115 -9
  117. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -12
  118. diffusers/pipelines/stable_video_diffusion/__init__.py +58 -0
  119. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +649 -0
  120. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +108 -12
  121. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +109 -14
  122. diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -0
  123. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +1 -0
  124. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +18 -3
  125. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +4 -2
  126. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +872 -0
  127. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +29 -40
  128. diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -0
  129. diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -0
  130. diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -0
  131. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +14 -4
  132. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +9 -5
  133. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +1 -1
  134. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +2 -2
  135. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +1 -1
  136. diffusers/schedulers/__init__.py +2 -4
  137. diffusers/schedulers/deprecated/__init__.py +50 -0
  138. diffusers/schedulers/{scheduling_karras_ve.py → deprecated/scheduling_karras_ve.py} +4 -4
  139. diffusers/schedulers/{scheduling_sde_vp.py → deprecated/scheduling_sde_vp.py} +4 -6
  140. diffusers/schedulers/scheduling_ddim.py +1 -3
  141. diffusers/schedulers/scheduling_ddim_inverse.py +1 -3
  142. diffusers/schedulers/scheduling_ddim_parallel.py +1 -3
  143. diffusers/schedulers/scheduling_ddpm.py +1 -3
  144. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -3
  145. diffusers/schedulers/scheduling_deis_multistep.py +15 -5
  146. diffusers/schedulers/scheduling_dpmsolver_multistep.py +15 -5
  147. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +15 -5
  148. diffusers/schedulers/scheduling_dpmsolver_sde.py +1 -3
  149. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +15 -5
  150. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +1 -3
  151. diffusers/schedulers/scheduling_euler_discrete.py +40 -13
  152. diffusers/schedulers/scheduling_heun_discrete.py +15 -5
  153. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +15 -5
  154. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +15 -5
  155. diffusers/schedulers/scheduling_lcm.py +123 -29
  156. diffusers/schedulers/scheduling_lms_discrete.py +1 -3
  157. diffusers/schedulers/scheduling_pndm.py +1 -3
  158. diffusers/schedulers/scheduling_repaint.py +1 -3
  159. diffusers/schedulers/scheduling_unipc_multistep.py +15 -5
  160. diffusers/utils/__init__.py +1 -0
  161. diffusers/utils/constants.py +8 -7
  162. diffusers/utils/dummy_pt_objects.py +45 -0
  163. diffusers/utils/dummy_torch_and_transformers_objects.py +60 -0
  164. diffusers/utils/dynamic_modules_utils.py +4 -4
  165. diffusers/utils/export_utils.py +8 -3
  166. diffusers/utils/logging.py +10 -10
  167. diffusers/utils/outputs.py +5 -5
  168. diffusers/utils/peft_utils.py +88 -44
  169. diffusers/utils/torch_utils.py +2 -2
  170. {diffusers-0.23.1.dist-info → diffusers-0.24.0.dist-info}/METADATA +38 -22
  171. {diffusers-0.23.1.dist-info → diffusers-0.24.0.dist-info}/RECORD +175 -157
  172. diffusers/loaders.py +0 -3336
  173. {diffusers-0.23.1.dist-info → diffusers-0.24.0.dist-info}/LICENSE +0 -0
  174. {diffusers-0.23.1.dist-info → diffusers-0.24.0.dist-info}/WHEEL +0 -0
  175. {diffusers-0.23.1.dist-info → diffusers-0.24.0.dist-info}/entry_points.txt +0 -0
  176. {diffusers-0.23.1.dist-info → diffusers-0.24.0.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from ...models.lora import adjust_lora_scale_text_encoder
37
37
  from ...schedulers import KarrasDiffusionSchedulers
38
38
  from ...utils import (
39
39
  USE_PEFT_BACKEND,
40
+ deprecate,
40
41
  logging,
41
42
  replace_example_docstring,
42
43
  scale_lora_layers,
@@ -132,9 +133,13 @@ EXAMPLE_DOC_STRING = """
132
133
 
133
134
 
134
135
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
135
- def retrieve_latents(encoder_output, generator):
136
- if hasattr(encoder_output, "latent_dist"):
136
+ def retrieve_latents(
137
+ encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"
138
+ ):
139
+ if hasattr(encoder_output, "latent_dist") and sample_mode == "sample":
137
140
  return encoder_output.latent_dist.sample(generator)
141
+ elif hasattr(encoder_output, "latent_dist") and sample_mode == "argmax":
142
+ return encoder_output.latent_dist.mode()
138
143
  elif hasattr(encoder_output, "latents"):
139
144
  return encoder_output.latents
140
145
  else:
@@ -192,8 +197,10 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
192
197
  watermark output images. If not defined, it will default to True if the package is installed, otherwise no
193
198
  watermarker will be used.
194
199
  """
200
+
195
201
  model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
196
202
  _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
203
+ _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
197
204
 
198
205
  def __init__(
199
206
  self,
@@ -542,6 +549,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
542
549
  controlnet_conditioning_scale=1.0,
543
550
  control_guidance_start=0.0,
544
551
  control_guidance_end=1.0,
552
+ callback_on_step_end_tensor_inputs=None,
545
553
  ):
546
554
  if strength < 0 or strength > 1:
547
555
  raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}")
@@ -552,14 +560,20 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
552
560
  f"`num_inference_steps` has to be a positive integer but is {num_inference_steps} of type"
553
561
  f" {type(num_inference_steps)}."
554
562
  )
555
- if (callback_steps is None) or (
556
- callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
557
- ):
563
+
564
+ if callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0):
558
565
  raise ValueError(
559
566
  f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
560
567
  f" {type(callback_steps)}."
561
568
  )
562
569
 
570
+ if callback_on_step_end_tensor_inputs is not None and not all(
571
+ k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
572
+ ):
573
+ raise ValueError(
574
+ f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
575
+ )
576
+
563
577
  if prompt is not None and prompt_embeds is not None:
564
578
  raise ValueError(
565
579
  f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
@@ -950,6 +964,29 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
950
964
  """Disables the FreeU mechanism if enabled."""
951
965
  self.unet.disable_freeu()
952
966
 
967
+ @property
968
+ def guidance_scale(self):
969
+ return self._guidance_scale
970
+
971
+ @property
972
+ def clip_skip(self):
973
+ return self._clip_skip
974
+
975
+ # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
976
+ # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
977
+ # corresponds to doing no classifier free guidance.
978
+ @property
979
+ def do_classifier_free_guidance(self):
980
+ return self._guidance_scale > 1
981
+
982
+ @property
983
+ def cross_attention_kwargs(self):
984
+ return self._cross_attention_kwargs
985
+
986
+ @property
987
+ def num_timesteps(self):
988
+ return self._num_timesteps
989
+
953
990
  @torch.no_grad()
954
991
  @replace_example_docstring(EXAMPLE_DOC_STRING)
955
992
  def __call__(
@@ -975,8 +1012,6 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
975
1012
  negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
976
1013
  output_type: Optional[str] = "pil",
977
1014
  return_dict: bool = True,
978
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
979
- callback_steps: int = 1,
980
1015
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
981
1016
  controlnet_conditioning_scale: Union[float, List[float]] = 0.8,
982
1017
  guess_mode: bool = False,
@@ -991,6 +1026,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
991
1026
  aesthetic_score: float = 6.0,
992
1027
  negative_aesthetic_score: float = 2.5,
993
1028
  clip_skip: Optional[int] = None,
1029
+ callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
1030
+ callback_on_step_end_tensor_inputs: List[str] = ["latents"],
1031
+ **kwargs,
994
1032
  ):
995
1033
  r"""
996
1034
  Function invoked when calling the pipeline for generation.
@@ -1076,12 +1114,6 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1076
1114
  return_dict (`bool`, *optional*, defaults to `True`):
1077
1115
  Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
1078
1116
  plain tuple.
1079
- callback (`Callable`, *optional*):
1080
- A function that will be called every `callback_steps` steps during inference. The function will be
1081
- called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
1082
- callback_steps (`int`, *optional*, defaults to 1):
1083
- The frequency at which the `callback` function will be called. If not specified, the callback will be
1084
- called at every step.
1085
1117
  cross_attention_kwargs (`dict`, *optional*):
1086
1118
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
1087
1119
  `self.processor` in
@@ -1137,6 +1169,15 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1137
1169
  clip_skip (`int`, *optional*):
1138
1170
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
1139
1171
  the output of the pre-final layer will be used for computing the prompt embeddings.
1172
+ callback_on_step_end (`Callable`, *optional*):
1173
+ A function that calls at the end of each denoising steps during the inference. The function is called
1174
+ with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
1175
+ callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
1176
+ `callback_on_step_end_tensor_inputs`.
1177
+ callback_on_step_end_tensor_inputs (`List`, *optional*):
1178
+ The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
1179
+ will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
1180
+ `._callback_tensor_inputs` attribute of your pipeine class.
1140
1181
 
1141
1182
  Examples:
1142
1183
 
@@ -1145,6 +1186,23 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1145
1186
  [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple`
1146
1187
  containing the output images.
1147
1188
  """
1189
+
1190
+ callback = kwargs.pop("callback", None)
1191
+ callback_steps = kwargs.pop("callback_steps", None)
1192
+
1193
+ if callback is not None:
1194
+ deprecate(
1195
+ "callback",
1196
+ "1.0.0",
1197
+ "Passing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
1198
+ )
1199
+ if callback_steps is not None:
1200
+ deprecate(
1201
+ "callback_steps",
1202
+ "1.0.0",
1203
+ "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
1204
+ )
1205
+
1148
1206
  controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
1149
1207
 
1150
1208
  # align format for control guidance
@@ -1154,9 +1212,10 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1154
1212
  control_guidance_end = len(control_guidance_start) * [control_guidance_end]
1155
1213
  elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
1156
1214
  mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
1157
- control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
1158
- control_guidance_end
1159
- ]
1215
+ control_guidance_start, control_guidance_end = (
1216
+ mult * [control_guidance_start],
1217
+ mult * [control_guidance_end],
1218
+ )
1160
1219
 
1161
1220
  # 1. Check inputs. Raise error if not correct
1162
1221
  self.check_inputs(
@@ -1175,8 +1234,13 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1175
1234
  controlnet_conditioning_scale,
1176
1235
  control_guidance_start,
1177
1236
  control_guidance_end,
1237
+ callback_on_step_end_tensor_inputs,
1178
1238
  )
1179
1239
 
1240
+ self._guidance_scale = guidance_scale
1241
+ self._clip_skip = clip_skip
1242
+ self._cross_attention_kwargs = cross_attention_kwargs
1243
+
1180
1244
  # 2. Define call parameters
1181
1245
  if prompt is not None and isinstance(prompt, str):
1182
1246
  batch_size = 1
@@ -1186,10 +1250,6 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1186
1250
  batch_size = prompt_embeds.shape[0]
1187
1251
 
1188
1252
  device = self._execution_device
1189
- # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
1190
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
1191
- # corresponds to doing no classifier free guidance.
1192
- do_classifier_free_guidance = guidance_scale > 1.0
1193
1253
 
1194
1254
  if isinstance(controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float):
1195
1255
  controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(controlnet.nets)
@@ -1203,7 +1263,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1203
1263
 
1204
1264
  # 3. Encode input prompt
1205
1265
  text_encoder_lora_scale = (
1206
- cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
1266
+ self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
1207
1267
  )
1208
1268
  (
1209
1269
  prompt_embeds,
@@ -1215,7 +1275,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1215
1275
  prompt_2,
1216
1276
  device,
1217
1277
  num_images_per_prompt,
1218
- do_classifier_free_guidance,
1278
+ self.do_classifier_free_guidance,
1219
1279
  negative_prompt,
1220
1280
  negative_prompt_2,
1221
1281
  prompt_embeds=prompt_embeds,
@@ -1223,7 +1283,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1223
1283
  pooled_prompt_embeds=pooled_prompt_embeds,
1224
1284
  negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
1225
1285
  lora_scale=text_encoder_lora_scale,
1226
- clip_skip=clip_skip,
1286
+ clip_skip=self.clip_skip,
1227
1287
  )
1228
1288
 
1229
1289
  # 4. Prepare image and controlnet_conditioning_image
@@ -1238,7 +1298,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1238
1298
  num_images_per_prompt=num_images_per_prompt,
1239
1299
  device=device,
1240
1300
  dtype=controlnet.dtype,
1241
- do_classifier_free_guidance=do_classifier_free_guidance,
1301
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
1242
1302
  guess_mode=guess_mode,
1243
1303
  )
1244
1304
  height, width = control_image.shape[-2:]
@@ -1254,7 +1314,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1254
1314
  num_images_per_prompt=num_images_per_prompt,
1255
1315
  device=device,
1256
1316
  dtype=controlnet.dtype,
1257
- do_classifier_free_guidance=do_classifier_free_guidance,
1317
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
1258
1318
  guess_mode=guess_mode,
1259
1319
  )
1260
1320
 
@@ -1269,6 +1329,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1269
1329
  self.scheduler.set_timesteps(num_inference_steps, device=device)
1270
1330
  timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
1271
1331
  latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
1332
+ self._num_timesteps = len(timesteps)
1272
1333
 
1273
1334
  # 6. Prepare latent variables
1274
1335
  latents = self.prepare_latents(
@@ -1326,7 +1387,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1326
1387
  )
1327
1388
  add_time_ids = add_time_ids.repeat(batch_size * num_images_per_prompt, 1)
1328
1389
 
1329
- if do_classifier_free_guidance:
1390
+ if self.do_classifier_free_guidance:
1330
1391
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
1331
1392
  add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
1332
1393
  add_neg_time_ids = add_neg_time_ids.repeat(batch_size * num_images_per_prompt, 1)
@@ -1341,13 +1402,13 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1341
1402
  with self.progress_bar(total=num_inference_steps) as progress_bar:
1342
1403
  for i, t in enumerate(timesteps):
1343
1404
  # expand the latents if we are doing classifier free guidance
1344
- latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
1405
+ latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
1345
1406
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
1346
1407
 
1347
1408
  added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
1348
1409
 
1349
1410
  # controlnet(s) inference
1350
- if guess_mode and do_classifier_free_guidance:
1411
+ if guess_mode and self.do_classifier_free_guidance:
1351
1412
  # Infer ControlNet only for the conditional batch.
1352
1413
  control_model_input = latents
1353
1414
  control_model_input = self.scheduler.scale_model_input(control_model_input, t)
@@ -1380,7 +1441,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1380
1441
  return_dict=False,
1381
1442
  )
1382
1443
 
1383
- if guess_mode and do_classifier_free_guidance:
1444
+ if guess_mode and self.do_classifier_free_guidance:
1384
1445
  # Infered ControlNet only for the conditional batch.
1385
1446
  # To apply the output of ControlNet to both the unconditional and conditional batches,
1386
1447
  # add 0 to the unconditional batch to keep it unchanged.
@@ -1392,7 +1453,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1392
1453
  latent_model_input,
1393
1454
  t,
1394
1455
  encoder_hidden_states=prompt_embeds,
1395
- cross_attention_kwargs=cross_attention_kwargs,
1456
+ cross_attention_kwargs=self.cross_attention_kwargs,
1396
1457
  down_block_additional_residuals=down_block_res_samples,
1397
1458
  mid_block_additional_residual=mid_block_res_sample,
1398
1459
  added_cond_kwargs=added_cond_kwargs,
@@ -1400,13 +1461,23 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1400
1461
  )[0]
1401
1462
 
1402
1463
  # perform guidance
1403
- if do_classifier_free_guidance:
1464
+ if self.do_classifier_free_guidance:
1404
1465
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
1405
1466
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
1406
1467
 
1407
1468
  # compute the previous noisy sample x_t -> x_t-1
1408
1469
  latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
1409
1470
 
1471
+ if callback_on_step_end is not None:
1472
+ callback_kwargs = {}
1473
+ for k in callback_on_step_end_tensor_inputs:
1474
+ callback_kwargs[k] = locals()[k]
1475
+ callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
1476
+
1477
+ latents = callback_outputs.pop("latents", latents)
1478
+ prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
1479
+ negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
1480
+
1410
1481
  # call the callback, if provided
1411
1482
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
1412
1483
  progress_bar.update()
@@ -39,6 +39,7 @@ class DanceDiffusionPipeline(DiffusionPipeline):
39
39
  A scheduler to be used in combination with `unet` to denoise the encoded audio latents. Can be one of
40
40
  [`IPNDMScheduler`].
41
41
  """
42
+
42
43
  model_cpu_offload_seq = "unet"
43
44
 
44
45
  def __init__(self, unet, scheduler):
@@ -35,6 +35,7 @@ class DDIMPipeline(DiffusionPipeline):
35
35
  A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
36
36
  [`DDPMScheduler`], or [`DDIMScheduler`].
37
37
  """
38
+
38
39
  model_cpu_offload_seq = "unet"
39
40
 
40
41
  def __init__(self, unet, scheduler):
@@ -35,6 +35,7 @@ class DDPMPipeline(DiffusionPipeline):
35
35
  A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
36
36
  [`DDPMScheduler`], or [`DDIMScheduler`].
37
37
  """
38
+
38
39
  model_cpu_offload_seq = "unet"
39
40
 
40
41
  def __init__(self, unet, scheduler):
@@ -98,7 +98,19 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
98
98
  watermarker: Optional[IFWatermarker]
99
99
 
100
100
  bad_punct_regex = re.compile(
101
- r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
101
+ r"["
102
+ + "#®•©™&@·º½¾¿¡§~"
103
+ + r"\)"
104
+ + r"\("
105
+ + r"\]"
106
+ + r"\["
107
+ + r"\}"
108
+ + r"\{"
109
+ + r"\|"
110
+ + "\\"
111
+ + r"\/"
112
+ + r"\*"
113
+ + r"]{1,}"
102
114
  ) # noqa
103
115
 
104
116
  _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
@@ -122,7 +122,19 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
122
122
  watermarker: Optional[IFWatermarker]
123
123
 
124
124
  bad_punct_regex = re.compile(
125
- r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
125
+ r"["
126
+ + "#®•©™&@·º½¾¿¡§~"
127
+ + r"\)"
128
+ + r"\("
129
+ + r"\]"
130
+ + r"\["
131
+ + r"\}"
132
+ + r"\{"
133
+ + r"\|"
134
+ + "\\"
135
+ + r"\/"
136
+ + r"\*"
137
+ + r"]{1,}"
126
138
  ) # noqa
127
139
 
128
140
  _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
@@ -126,7 +126,19 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
126
126
  watermarker: Optional[IFWatermarker]
127
127
 
128
128
  bad_punct_regex = re.compile(
129
- r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
129
+ r"["
130
+ + "#®•©™&@·º½¾¿¡§~"
131
+ + r"\)"
132
+ + r"\("
133
+ + r"\]"
134
+ + r"\["
135
+ + r"\}"
136
+ + r"\{"
137
+ + r"\|"
138
+ + "\\"
139
+ + r"\/"
140
+ + r"\*"
141
+ + r"]{1,}"
130
142
  ) # noqa
131
143
 
132
144
  _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor"]
@@ -125,7 +125,19 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
125
125
  watermarker: Optional[IFWatermarker]
126
126
 
127
127
  bad_punct_regex = re.compile(
128
- r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
128
+ r"["
129
+ + "#®•©™&@·º½¾¿¡§~"
130
+ + r"\)"
131
+ + r"\("
132
+ + r"\]"
133
+ + r"\["
134
+ + r"\}"
135
+ + r"\{"
136
+ + r"\|"
137
+ + "\\"
138
+ + r"\/"
139
+ + r"\*"
140
+ + r"]{1,}"
129
141
  ) # noqa
130
142
 
131
143
  _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
@@ -128,7 +128,19 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
128
128
  watermarker: Optional[IFWatermarker]
129
129
 
130
130
  bad_punct_regex = re.compile(
131
- r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
131
+ r"["
132
+ + "#®•©™&@·º½¾¿¡§~"
133
+ + r"\)"
134
+ + r"\("
135
+ + r"\]"
136
+ + r"\["
137
+ + r"\}"
138
+ + r"\{"
139
+ + r"\|"
140
+ + "\\"
141
+ + r"\/"
142
+ + r"\*"
143
+ + r"]{1,}"
132
144
  ) # noqa
133
145
 
134
146
  model_cpu_offload_seq = "text_encoder->unet"
@@ -84,7 +84,19 @@ class IFSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
84
84
  watermarker: Optional[IFWatermarker]
85
85
 
86
86
  bad_punct_regex = re.compile(
87
- r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
87
+ r"["
88
+ + "#®•©™&@·º½¾¿¡§~"
89
+ + r"\)"
90
+ + r"\("
91
+ + r"\]"
92
+ + r"\["
93
+ + r"\}"
94
+ + r"\{"
95
+ + r"\|"
96
+ + "\\"
97
+ + r"\/"
98
+ + r"\*"
99
+ + r"]{1,}"
88
100
  ) # noqa
89
101
 
90
102
  _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
@@ -43,6 +43,7 @@ class DiTPipeline(DiffusionPipeline):
43
43
  scheduler ([`DDIMScheduler`]):
44
44
  A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
45
45
  """
46
+
46
47
  model_cpu_offload_seq = "transformer->vae"
47
48
 
48
49
  def __init__(
@@ -181,7 +181,7 @@ class KandinskyV22Pipeline(DiffusionPipeline):
181
181
  callback_on_step_end_tensor_inputs (`List`, *optional*):
182
182
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
183
183
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
184
- `._callback_tensor_inputs` attribute of your pipeine class.
184
+ `._callback_tensor_inputs` attribute of your pipeline class.
185
185
 
186
186
  Examples:
187
187
 
@@ -283,7 +283,7 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
283
283
  callback_on_step_end_tensor_inputs (`List`, *optional*):
284
284
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
285
285
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
286
- `._callback_tensor_inputs` attribute of your pipeine class.
286
+ `._callback_tensor_inputs` attribute of your pipeline class.
287
287
 
288
288
  Examples:
289
289
 
@@ -759,7 +759,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
759
759
  prior_callback_on_step_end_tensor_inputs (`List`, *optional*):
760
760
  The list of tensor inputs for the `prior_callback_on_step_end` function. The tensors specified in the
761
761
  list will be passed as `callback_kwargs` argument. You will only be able to include variables listed in
762
- the `._callback_tensor_inputs` attribute of your pipeine class.
762
+ the `._callback_tensor_inputs` attribute of your pipeline class.
763
763
  callback_on_step_end (`Callable`, *optional*):
764
764
  A function that calls at the end of each denoising steps during the inference. The function is called
765
765
  with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
@@ -768,7 +768,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
768
768
  callback_on_step_end_tensor_inputs (`List`, *optional*):
769
769
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
770
770
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
771
- `._callback_tensor_inputs` attribute of your pipeine class.
771
+ `._callback_tensor_inputs` attribute of your pipeline class.
772
772
 
773
773
 
774
774
  Examples:
@@ -255,7 +255,7 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
255
255
  callback_on_step_end_tensor_inputs (`List`, *optional*):
256
256
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
257
257
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
258
- `._callback_tensor_inputs` attribute of your pipeine class.
258
+ `._callback_tensor_inputs` attribute of your pipeline class.
259
259
 
260
260
  Examples:
261
261
 
@@ -362,7 +362,7 @@ class KandinskyV22InpaintPipeline(DiffusionPipeline):
362
362
  callback_on_step_end_tensor_inputs (`List`, *optional*):
363
363
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
364
364
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
365
- `._callback_tensor_inputs` attribute of your pipeine class.
365
+ `._callback_tensor_inputs` attribute of your pipeline class.
366
366
 
367
367
  Examples:
368
368
 
@@ -423,7 +423,7 @@ class KandinskyV22PriorPipeline(DiffusionPipeline):
423
423
  callback_on_step_end_tensor_inputs (`List`, *optional*):
424
424
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
425
425
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
426
- `._callback_tensor_inputs` attribute of your pipeine class.
426
+ `._callback_tensor_inputs` attribute of your pipeline class.
427
427
 
428
428
  Examples:
429
429
 
@@ -0,0 +1,49 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+ try:
17
+ if not (is_transformers_available() and is_torch_available()):
18
+ raise OptionalDependencyNotAvailable()
19
+ except OptionalDependencyNotAvailable:
20
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
21
+
22
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
23
+ else:
24
+ _import_structure["kandinsky3_pipeline"] = ["Kandinsky3Pipeline"]
25
+ _import_structure["kandinsky3img2img_pipeline"] = ["Kandinsky3Img2ImgPipeline"]
26
+
27
+
28
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
29
+ try:
30
+ if not (is_transformers_available() and is_torch_available()):
31
+ raise OptionalDependencyNotAvailable()
32
+
33
+ except OptionalDependencyNotAvailable:
34
+ from ...utils.dummy_torch_and_transformers_objects import *
35
+ else:
36
+ from .kandinsky3_pipeline import Kandinsky3Pipeline
37
+ from .kandinsky3img2img_pipeline import Kandinsky3Img2ImgPipeline
38
+ else:
39
+ import sys
40
+
41
+ sys.modules[__name__] = _LazyModule(
42
+ __name__,
43
+ globals()["__file__"],
44
+ _import_structure,
45
+ module_spec=__spec__,
46
+ )
47
+
48
+ for name, value in _dummy_objects.items():
49
+ setattr(sys.modules[__name__], name, value)