diffusers 0.34.0__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. diffusers/__init__.py +98 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/custom_blocks.py +134 -0
  4. diffusers/commands/diffusers_cli.py +2 -0
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/configuration_utils.py +11 -2
  7. diffusers/dependency_versions_table.py +3 -3
  8. diffusers/guiders/__init__.py +41 -0
  9. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  10. diffusers/guiders/auto_guidance.py +190 -0
  11. diffusers/guiders/classifier_free_guidance.py +141 -0
  12. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  13. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  14. diffusers/guiders/guider_utils.py +309 -0
  15. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  16. diffusers/guiders/skip_layer_guidance.py +262 -0
  17. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  18. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  19. diffusers/hooks/__init__.py +17 -0
  20. diffusers/hooks/_common.py +56 -0
  21. diffusers/hooks/_helpers.py +293 -0
  22. diffusers/hooks/faster_cache.py +7 -6
  23. diffusers/hooks/first_block_cache.py +259 -0
  24. diffusers/hooks/group_offloading.py +292 -286
  25. diffusers/hooks/hooks.py +56 -1
  26. diffusers/hooks/layer_skip.py +263 -0
  27. diffusers/hooks/layerwise_casting.py +2 -7
  28. diffusers/hooks/pyramid_attention_broadcast.py +14 -11
  29. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  30. diffusers/hooks/utils.py +43 -0
  31. diffusers/loaders/__init__.py +6 -0
  32. diffusers/loaders/ip_adapter.py +255 -4
  33. diffusers/loaders/lora_base.py +63 -30
  34. diffusers/loaders/lora_conversion_utils.py +434 -53
  35. diffusers/loaders/lora_pipeline.py +834 -37
  36. diffusers/loaders/peft.py +28 -5
  37. diffusers/loaders/single_file_model.py +44 -11
  38. diffusers/loaders/single_file_utils.py +170 -2
  39. diffusers/loaders/transformer_flux.py +9 -10
  40. diffusers/loaders/transformer_sd3.py +6 -1
  41. diffusers/loaders/unet.py +22 -5
  42. diffusers/loaders/unet_loader_utils.py +5 -2
  43. diffusers/models/__init__.py +8 -0
  44. diffusers/models/attention.py +484 -3
  45. diffusers/models/attention_dispatch.py +1218 -0
  46. diffusers/models/attention_processor.py +105 -663
  47. diffusers/models/auto_model.py +2 -2
  48. diffusers/models/autoencoders/__init__.py +1 -0
  49. diffusers/models/autoencoders/autoencoder_dc.py +14 -1
  50. diffusers/models/autoencoders/autoencoder_kl.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -1
  52. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  53. diffusers/models/autoencoders/autoencoder_kl_wan.py +370 -40
  54. diffusers/models/cache_utils.py +31 -9
  55. diffusers/models/controlnets/controlnet_flux.py +5 -5
  56. diffusers/models/controlnets/controlnet_union.py +4 -4
  57. diffusers/models/embeddings.py +26 -34
  58. diffusers/models/model_loading_utils.py +233 -1
  59. diffusers/models/modeling_flax_utils.py +1 -2
  60. diffusers/models/modeling_utils.py +159 -94
  61. diffusers/models/transformers/__init__.py +2 -0
  62. diffusers/models/transformers/transformer_chroma.py +16 -117
  63. diffusers/models/transformers/transformer_cogview4.py +36 -2
  64. diffusers/models/transformers/transformer_cosmos.py +11 -4
  65. diffusers/models/transformers/transformer_flux.py +372 -132
  66. diffusers/models/transformers/transformer_hunyuan_video.py +6 -0
  67. diffusers/models/transformers/transformer_ltx.py +104 -23
  68. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  69. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  70. diffusers/models/transformers/transformer_wan.py +298 -85
  71. diffusers/models/transformers/transformer_wan_vace.py +15 -21
  72. diffusers/models/unets/unet_2d_condition.py +2 -1
  73. diffusers/modular_pipelines/__init__.py +83 -0
  74. diffusers/modular_pipelines/components_manager.py +1068 -0
  75. diffusers/modular_pipelines/flux/__init__.py +66 -0
  76. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  77. diffusers/modular_pipelines/flux/decoders.py +109 -0
  78. diffusers/modular_pipelines/flux/denoise.py +227 -0
  79. diffusers/modular_pipelines/flux/encoders.py +412 -0
  80. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  81. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  82. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  83. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  84. diffusers/modular_pipelines/node_utils.py +665 -0
  85. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  86. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  87. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  88. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  89. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  90. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  91. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  92. diffusers/modular_pipelines/wan/__init__.py +66 -0
  93. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  94. diffusers/modular_pipelines/wan/decoders.py +105 -0
  95. diffusers/modular_pipelines/wan/denoise.py +261 -0
  96. diffusers/modular_pipelines/wan/encoders.py +242 -0
  97. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  98. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  99. diffusers/pipelines/__init__.py +31 -0
  100. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +2 -3
  101. diffusers/pipelines/auto_pipeline.py +17 -13
  102. diffusers/pipelines/chroma/pipeline_chroma.py +5 -5
  103. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +5 -5
  104. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +9 -8
  105. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +9 -8
  106. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +10 -9
  107. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +9 -8
  108. diffusers/pipelines/cogview4/pipeline_cogview4.py +16 -15
  109. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +3 -2
  110. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +212 -93
  111. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +7 -3
  112. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +194 -92
  113. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +1 -1
  114. diffusers/pipelines/dit/pipeline_dit.py +3 -1
  115. diffusers/pipelines/flux/__init__.py +4 -0
  116. diffusers/pipelines/flux/pipeline_flux.py +34 -26
  117. diffusers/pipelines/flux/pipeline_flux_control.py +8 -8
  118. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +1 -1
  119. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1 -1
  120. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1 -1
  121. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +1 -1
  122. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1 -1
  123. diffusers/pipelines/flux/pipeline_flux_fill.py +1 -1
  124. diffusers/pipelines/flux/pipeline_flux_img2img.py +1 -1
  125. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1 -1
  126. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  127. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  128. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  129. diffusers/pipelines/flux/pipeline_output.py +6 -4
  130. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +5 -5
  131. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +25 -24
  132. diffusers/pipelines/ltx/pipeline_ltx.py +13 -12
  133. diffusers/pipelines/ltx/pipeline_ltx_condition.py +10 -9
  134. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +13 -12
  135. diffusers/pipelines/mochi/pipeline_mochi.py +9 -8
  136. diffusers/pipelines/pipeline_flax_utils.py +2 -2
  137. diffusers/pipelines/pipeline_loading_utils.py +24 -2
  138. diffusers/pipelines/pipeline_utils.py +22 -15
  139. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +3 -1
  140. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +20 -0
  141. diffusers/pipelines/qwenimage/__init__.py +55 -0
  142. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  143. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  144. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +849 -0
  145. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  146. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  147. diffusers/pipelines/sana/pipeline_sana_sprint.py +5 -5
  148. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  149. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  150. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  151. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  152. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  153. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  154. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  155. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -1
  156. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
  157. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
  158. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -1
  159. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +6 -5
  160. diffusers/pipelines/wan/pipeline_wan.py +78 -20
  161. diffusers/pipelines/wan/pipeline_wan_i2v.py +112 -32
  162. diffusers/pipelines/wan/pipeline_wan_vace.py +1 -2
  163. diffusers/quantizers/__init__.py +1 -177
  164. diffusers/quantizers/base.py +11 -0
  165. diffusers/quantizers/gguf/utils.py +92 -3
  166. diffusers/quantizers/pipe_quant_config.py +202 -0
  167. diffusers/quantizers/torchao/torchao_quantizer.py +26 -0
  168. diffusers/schedulers/scheduling_deis_multistep.py +8 -1
  169. diffusers/schedulers/scheduling_dpmsolver_multistep.py +6 -0
  170. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +6 -0
  171. diffusers/schedulers/scheduling_scm.py +0 -1
  172. diffusers/schedulers/scheduling_unipc_multistep.py +10 -1
  173. diffusers/schedulers/scheduling_utils.py +2 -2
  174. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  175. diffusers/training_utils.py +78 -0
  176. diffusers/utils/__init__.py +10 -0
  177. diffusers/utils/constants.py +4 -0
  178. diffusers/utils/dummy_pt_objects.py +312 -0
  179. diffusers/utils/dummy_torch_and_transformers_objects.py +255 -0
  180. diffusers/utils/dynamic_modules_utils.py +84 -25
  181. diffusers/utils/hub_utils.py +33 -17
  182. diffusers/utils/import_utils.py +70 -0
  183. diffusers/utils/peft_utils.py +11 -8
  184. diffusers/utils/testing_utils.py +136 -10
  185. diffusers/utils/torch_utils.py +18 -0
  186. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/METADATA +6 -6
  187. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/RECORD +191 -127
  188. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/LICENSE +0 -0
  189. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/WHEEL +0 -0
  190. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/entry_points.txt +0 -0
  191. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,6 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
19
19
  import numpy as np
20
20
  import PIL.Image
21
21
  import torch
22
- import torch.nn.functional as F
23
22
  from transformers import (
24
23
  CLIPImageProcessor,
25
24
  CLIPTextModel,
@@ -38,7 +37,13 @@ from ...loaders import (
38
37
  StableDiffusionXLLoraLoaderMixin,
39
38
  TextualInversionLoaderMixin,
40
39
  )
41
- from ...models import AutoencoderKL, ControlNetModel, ControlNetUnionModel, ImageProjection, UNet2DConditionModel
40
+ from ...models import (
41
+ AutoencoderKL,
42
+ ControlNetUnionModel,
43
+ ImageProjection,
44
+ MultiControlNetUnionModel,
45
+ UNet2DConditionModel,
46
+ )
42
47
  from ...models.attention_processor import (
43
48
  AttnProcessor2_0,
44
49
  XFormersAttnProcessor,
@@ -262,7 +267,9 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
262
267
  tokenizer: CLIPTokenizer,
263
268
  tokenizer_2: CLIPTokenizer,
264
269
  unet: UNet2DConditionModel,
265
- controlnet: ControlNetUnionModel,
270
+ controlnet: Union[
271
+ ControlNetUnionModel, List[ControlNetUnionModel], Tuple[ControlNetUnionModel], MultiControlNetUnionModel
272
+ ],
266
273
  scheduler: KarrasDiffusionSchedulers,
267
274
  requires_aesthetics_score: bool = False,
268
275
  force_zeros_for_empty_prompt: bool = True,
@@ -272,8 +279,8 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
272
279
  ):
273
280
  super().__init__()
274
281
 
275
- if not isinstance(controlnet, ControlNetUnionModel):
276
- raise ValueError("Expected `controlnet` to be of type `ControlNetUnionModel`.")
282
+ if isinstance(controlnet, (list, tuple)):
283
+ controlnet = MultiControlNetUnionModel(controlnet)
277
284
 
278
285
  self.register_modules(
279
286
  vae=vae,
@@ -649,6 +656,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
649
656
  controlnet_conditioning_scale=1.0,
650
657
  control_guidance_start=0.0,
651
658
  control_guidance_end=1.0,
659
+ control_mode=None,
652
660
  callback_on_step_end_tensor_inputs=None,
653
661
  ):
654
662
  if strength < 0 or strength > 1:
@@ -722,28 +730,44 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
722
730
  "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
723
731
  )
724
732
 
733
+ # `prompt` needs more sophisticated handling when there are multiple
734
+ # conditionings.
735
+ if isinstance(self.controlnet, MultiControlNetUnionModel):
736
+ if isinstance(prompt, list):
737
+ logger.warning(
738
+ f"You have {len(self.controlnet.nets)} ControlNets and you have passed {len(prompt)}"
739
+ " prompts. The conditionings will be fixed across the prompts."
740
+ )
741
+
725
742
  # Check `image`
726
- is_compiled = hasattr(F, "scaled_dot_product_attention") and isinstance(
727
- self.controlnet, torch._dynamo.eval_frame.OptimizedModule
728
- )
729
- if (
730
- isinstance(self.controlnet, ControlNetModel)
731
- or is_compiled
732
- and isinstance(self.controlnet._orig_mod, ControlNetModel)
733
- ):
734
- self.check_image(image, prompt, prompt_embeds)
735
- elif (
736
- isinstance(self.controlnet, ControlNetUnionModel)
737
- or is_compiled
738
- and isinstance(self.controlnet._orig_mod, ControlNetUnionModel)
739
- ):
740
- self.check_image(image, prompt, prompt_embeds)
741
- else:
742
- assert False
743
+ controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
744
+
745
+ if isinstance(controlnet, ControlNetUnionModel):
746
+ for image_ in image:
747
+ self.check_image(image_, prompt, prompt_embeds)
748
+ elif isinstance(controlnet, MultiControlNetUnionModel):
749
+ if not isinstance(image, list):
750
+ raise TypeError("For multiple controlnets: `image` must be type `list`")
751
+ elif not all(isinstance(i, list) for i in image):
752
+ raise ValueError("For multiple controlnets: elements of `image` must be list of conditionings.")
753
+ elif len(image) != len(self.controlnet.nets):
754
+ raise ValueError(
755
+ f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
756
+ )
757
+
758
+ for images_ in image:
759
+ for image_ in images_:
760
+ self.check_image(image_, prompt, prompt_embeds)
743
761
 
744
762
  if not isinstance(control_guidance_start, (tuple, list)):
745
763
  control_guidance_start = [control_guidance_start]
746
764
 
765
+ if isinstance(controlnet, MultiControlNetUnionModel):
766
+ if len(control_guidance_start) != len(self.controlnet.nets):
767
+ raise ValueError(
768
+ f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
769
+ )
770
+
747
771
  if not isinstance(control_guidance_end, (tuple, list)):
748
772
  control_guidance_end = [control_guidance_end]
749
773
 
@@ -762,6 +786,15 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
762
786
  if end > 1.0:
763
787
  raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
764
788
 
789
+ # Check `control_mode`
790
+ if isinstance(controlnet, ControlNetUnionModel):
791
+ if max(control_mode) >= controlnet.config.num_control_type:
792
+ raise ValueError(f"control_mode: must be lower than {controlnet.config.num_control_type}.")
793
+ elif isinstance(controlnet, MultiControlNetUnionModel):
794
+ for _control_mode, _controlnet in zip(control_mode, self.controlnet.nets):
795
+ if max(_control_mode) >= _controlnet.config.num_control_type:
796
+ raise ValueError(f"control_mode: must be lower than {_controlnet.config.num_control_type}.")
797
+
765
798
  if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
766
799
  raise ValueError(
767
800
  "Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
@@ -1049,7 +1082,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1049
1082
  prompt: Union[str, List[str]] = None,
1050
1083
  prompt_2: Optional[Union[str, List[str]]] = None,
1051
1084
  image: PipelineImageInput = None,
1052
- control_image: PipelineImageInput = None,
1085
+ control_image: Union[PipelineImageInput, List[PipelineImageInput]] = None,
1053
1086
  height: Optional[int] = None,
1054
1087
  width: Optional[int] = None,
1055
1088
  strength: float = 0.8,
@@ -1074,7 +1107,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1074
1107
  guess_mode: bool = False,
1075
1108
  control_guidance_start: Union[float, List[float]] = 0.0,
1076
1109
  control_guidance_end: Union[float, List[float]] = 1.0,
1077
- control_mode: Optional[Union[int, List[int]]] = None,
1110
+ control_mode: Optional[Union[int, List[int], List[List[int]]]] = None,
1078
1111
  original_size: Tuple[int, int] = None,
1079
1112
  crops_coords_top_left: Tuple[int, int] = (0, 0),
1080
1113
  target_size: Tuple[int, int] = None,
@@ -1104,13 +1137,13 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1104
1137
  `List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
1105
1138
  The initial image will be used as the starting point for the image generation process. Can also accept
1106
1139
  image latents as `image`, if passing latents directly, it will not be encoded again.
1107
- control_image (`PipelineImageInput`):
1108
- The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
1109
- the type is specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also
1110
- be accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
1111
- and/or width are passed, `image` is resized according to them. If multiple ControlNets are specified in
1112
- init, images must be passed as a list such that each element of the list can be correctly batched for
1113
- input to a single controlnet.
1140
+ control_image (`PipelineImageInput` or `List[PipelineImageInput]`, *optional*):
1141
+ The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
1142
+ specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
1143
+ as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
1144
+ width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
1145
+ images must be passed as a list such that each element of the list can be correctly batched for input
1146
+ to a single ControlNet.
1114
1147
  height (`int`, *optional*, defaults to the size of control_image):
1115
1148
  The height in pixels of the generated image. Anything below 512 pixels won't work well for
1116
1149
  [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
@@ -1184,16 +1217,21 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1184
1217
  `self.processor` in
1185
1218
  [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
1186
1219
  controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
1187
- The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
1188
- to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
1189
- corresponding scale as a list.
1220
+ The outputs of the ControlNet are multiplied by `controlnet_conditioning_scale` before they are added
1221
+ to the residual in the original `unet`. If multiple ControlNets are specified in `init`, you can set
1222
+ the corresponding scale as a list.
1190
1223
  guess_mode (`bool`, *optional*, defaults to `False`):
1191
1224
  In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
1192
1225
  you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
1193
1226
  control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
1194
- The percentage of total steps at which the controlnet starts applying.
1227
+ The percentage of total steps at which the ControlNet starts applying.
1195
1228
  control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
1196
- The percentage of total steps at which the controlnet stops applying.
1229
+ The percentage of total steps at which the ControlNet stops applying.
1230
+ control_mode (`int` or `List[int]` or `List[List[int]], *optional*):
1231
+ The control condition types for the ControlNet. See the ControlNet's model card forinformation on the
1232
+ available control modes. If multiple ControlNets are specified in `init`, control_mode should be a list
1233
+ where each ControlNet should have its corresponding control mode list. Should reflect the order of
1234
+ conditions in control_image
1197
1235
  original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
1198
1236
  If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
1199
1237
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
@@ -1273,12 +1311,6 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1273
1311
 
1274
1312
  controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
1275
1313
 
1276
- # align format for control guidance
1277
- if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
1278
- control_guidance_start = len(control_guidance_end) * [control_guidance_start]
1279
- elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
1280
- control_guidance_end = len(control_guidance_start) * [control_guidance_end]
1281
-
1282
1314
  if not isinstance(control_image, list):
1283
1315
  control_image = [control_image]
1284
1316
  else:
@@ -1287,37 +1319,56 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1287
1319
  if not isinstance(control_mode, list):
1288
1320
  control_mode = [control_mode]
1289
1321
 
1290
- if len(control_image) != len(control_mode):
1291
- raise ValueError("Expected len(control_image) == len(control_type)")
1322
+ if isinstance(controlnet, MultiControlNetUnionModel):
1323
+ control_image = [[item] for item in control_image]
1324
+ control_mode = [[item] for item in control_mode]
1292
1325
 
1293
- num_control_type = controlnet.config.num_control_type
1326
+ # align format for control guidance
1327
+ if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
1328
+ control_guidance_start = len(control_guidance_end) * [control_guidance_start]
1329
+ elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
1330
+ control_guidance_end = len(control_guidance_start) * [control_guidance_end]
1331
+ elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
1332
+ mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetUnionModel) else len(control_mode)
1333
+ control_guidance_start, control_guidance_end = (
1334
+ mult * [control_guidance_start],
1335
+ mult * [control_guidance_end],
1336
+ )
1337
+
1338
+ if isinstance(controlnet_conditioning_scale, float):
1339
+ mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetUnionModel) else len(control_mode)
1340
+ controlnet_conditioning_scale = [controlnet_conditioning_scale] * mult
1294
1341
 
1295
1342
  # 1. Check inputs
1296
- control_type = [0 for _ in range(num_control_type)]
1297
- for _image, control_idx in zip(control_image, control_mode):
1298
- control_type[control_idx] = 1
1299
- self.check_inputs(
1300
- prompt,
1301
- prompt_2,
1302
- _image,
1303
- strength,
1304
- num_inference_steps,
1305
- callback_steps,
1306
- negative_prompt,
1307
- negative_prompt_2,
1308
- prompt_embeds,
1309
- negative_prompt_embeds,
1310
- pooled_prompt_embeds,
1311
- negative_pooled_prompt_embeds,
1312
- ip_adapter_image,
1313
- ip_adapter_image_embeds,
1314
- controlnet_conditioning_scale,
1315
- control_guidance_start,
1316
- control_guidance_end,
1317
- callback_on_step_end_tensor_inputs,
1318
- )
1343
+ self.check_inputs(
1344
+ prompt,
1345
+ prompt_2,
1346
+ control_image,
1347
+ strength,
1348
+ num_inference_steps,
1349
+ callback_steps,
1350
+ negative_prompt,
1351
+ negative_prompt_2,
1352
+ prompt_embeds,
1353
+ negative_prompt_embeds,
1354
+ pooled_prompt_embeds,
1355
+ negative_pooled_prompt_embeds,
1356
+ ip_adapter_image,
1357
+ ip_adapter_image_embeds,
1358
+ controlnet_conditioning_scale,
1359
+ control_guidance_start,
1360
+ control_guidance_end,
1361
+ control_mode,
1362
+ callback_on_step_end_tensor_inputs,
1363
+ )
1319
1364
 
1320
- control_type = torch.Tensor(control_type)
1365
+ if isinstance(controlnet, ControlNetUnionModel):
1366
+ control_type = torch.zeros(controlnet.config.num_control_type).scatter_(0, torch.tensor(control_mode), 1)
1367
+ elif isinstance(controlnet, MultiControlNetUnionModel):
1368
+ control_type = [
1369
+ torch.zeros(controlnet_.config.num_control_type).scatter_(0, torch.tensor(control_mode_), 1)
1370
+ for control_mode_, controlnet_ in zip(control_mode, self.controlnet.nets)
1371
+ ]
1321
1372
 
1322
1373
  self._guidance_scale = guidance_scale
1323
1374
  self._clip_skip = clip_skip
@@ -1334,7 +1385,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1334
1385
 
1335
1386
  device = self._execution_device
1336
1387
 
1337
- global_pool_conditions = controlnet.config.global_pool_conditions
1388
+ global_pool_conditions = (
1389
+ controlnet.config.global_pool_conditions
1390
+ if isinstance(controlnet, ControlNetUnionModel)
1391
+ else controlnet.nets[0].config.global_pool_conditions
1392
+ )
1338
1393
  guess_mode = guess_mode or global_pool_conditions
1339
1394
 
1340
1395
  # 3.1. Encode input prompt
@@ -1372,22 +1427,55 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1372
1427
  self.do_classifier_free_guidance,
1373
1428
  )
1374
1429
 
1375
- # 4. Prepare image and controlnet_conditioning_image
1430
+ # 4.1 Prepare image
1376
1431
  image = self.image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
1377
1432
 
1378
- for idx, _ in enumerate(control_image):
1379
- control_image[idx] = self.prepare_control_image(
1380
- image=control_image[idx],
1381
- width=width,
1382
- height=height,
1383
- batch_size=batch_size * num_images_per_prompt,
1384
- num_images_per_prompt=num_images_per_prompt,
1385
- device=device,
1386
- dtype=controlnet.dtype,
1387
- do_classifier_free_guidance=self.do_classifier_free_guidance,
1388
- guess_mode=guess_mode,
1389
- )
1390
- height, width = control_image[idx].shape[-2:]
1433
+ # 4.2 Prepare control images
1434
+ if isinstance(controlnet, ControlNetUnionModel):
1435
+ control_images = []
1436
+
1437
+ for image_ in control_image:
1438
+ image_ = self.prepare_control_image(
1439
+ image=image_,
1440
+ width=width,
1441
+ height=height,
1442
+ batch_size=batch_size * num_images_per_prompt,
1443
+ num_images_per_prompt=num_images_per_prompt,
1444
+ device=device,
1445
+ dtype=controlnet.dtype,
1446
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
1447
+ guess_mode=guess_mode,
1448
+ )
1449
+
1450
+ control_images.append(image_)
1451
+
1452
+ control_image = control_images
1453
+ height, width = control_image[0].shape[-2:]
1454
+
1455
+ elif isinstance(controlnet, MultiControlNetUnionModel):
1456
+ control_images = []
1457
+
1458
+ for control_image_ in control_image:
1459
+ images = []
1460
+
1461
+ for image_ in control_image_:
1462
+ image_ = self.prepare_control_image(
1463
+ image=image_,
1464
+ width=width,
1465
+ height=height,
1466
+ batch_size=batch_size * num_images_per_prompt,
1467
+ num_images_per_prompt=num_images_per_prompt,
1468
+ device=device,
1469
+ dtype=controlnet.dtype,
1470
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
1471
+ guess_mode=guess_mode,
1472
+ )
1473
+
1474
+ images.append(image_)
1475
+ control_images.append(images)
1476
+
1477
+ control_image = control_images
1478
+ height, width = control_image[0][0].shape[-2:]
1391
1479
 
1392
1480
  # 5. Prepare timesteps
1393
1481
  self.scheduler.set_timesteps(num_inference_steps, device=device)
@@ -1414,10 +1502,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1414
1502
  # 7.1 Create tensor stating which controlnets to keep
1415
1503
  controlnet_keep = []
1416
1504
  for i in range(len(timesteps)):
1417
- controlnet_keep.append(
1418
- 1.0
1419
- - float(i / len(timesteps) < control_guidance_start or (i + 1) / len(timesteps) > control_guidance_end)
1420
- )
1505
+ keeps = [
1506
+ 1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
1507
+ for s, e in zip(control_guidance_start, control_guidance_end)
1508
+ ]
1509
+ controlnet_keep.append(keeps)
1421
1510
 
1422
1511
  # 7.2 Prepare added time ids & embeddings
1423
1512
  original_size = original_size or (height, width)
@@ -1460,12 +1549,25 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1460
1549
  prompt_embeds = prompt_embeds.to(device)
1461
1550
  add_text_embeds = add_text_embeds.to(device)
1462
1551
  add_time_ids = add_time_ids.to(device)
1463
- control_type = (
1464
- control_type.reshape(1, -1)
1465
- .to(device, dtype=prompt_embeds.dtype)
1466
- .repeat(batch_size * num_images_per_prompt * 2, 1)
1552
+
1553
+ control_type_repeat_factor = (
1554
+ batch_size * num_images_per_prompt * (2 if self.do_classifier_free_guidance else 1)
1467
1555
  )
1468
1556
 
1557
+ if isinstance(controlnet, ControlNetUnionModel):
1558
+ control_type = (
1559
+ control_type.reshape(1, -1)
1560
+ .to(self._execution_device, dtype=prompt_embeds.dtype)
1561
+ .repeat(control_type_repeat_factor, 1)
1562
+ )
1563
+ elif isinstance(controlnet, MultiControlNetUnionModel):
1564
+ control_type = [
1565
+ _control_type.reshape(1, -1)
1566
+ .to(self._execution_device, dtype=prompt_embeds.dtype)
1567
+ .repeat(control_type_repeat_factor, 1)
1568
+ for _control_type in control_type
1569
+ ]
1570
+
1469
1571
  # 8. Denoising loop
1470
1572
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
1471
1573
  with self.progress_bar(total=num_inference_steps) as progress_bar:
@@ -717,7 +717,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Sta
717
717
  from diffusers import CycleDiffusionPipeline, DDIMScheduler
718
718
 
719
719
  # load the pipeline
720
- # make sure you're logged in with `huggingface-cli login`
720
+ # make sure you're logged in with `hf auth login`
721
721
  model_id_or_path = "CompVis/stable-diffusion-v1-4"
722
722
  scheduler = DDIMScheduler.from_pretrained(model_id_or_path, subfolder="scheduler")
723
723
  pipe = CycleDiffusionPipeline.from_pretrained(model_id_or_path, scheduler=scheduler).to("cuda")
@@ -46,7 +46,9 @@ class DiTPipeline(DiffusionPipeline):
46
46
 
47
47
  Parameters:
48
48
  transformer ([`DiTTransformer2DModel`]):
49
- A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents.
49
+ A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents. Initially published as
50
+ [`Transformer2DModel`](https://huggingface.co/facebook/DiT-XL-2-256/blob/main/transformer/config.json#L2)
51
+ in the config, but the mismatch can be ignored.
50
52
  vae ([`AutoencoderKL`]):
51
53
  Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
52
54
  scheduler ([`DDIMScheduler`]):
@@ -33,6 +33,8 @@ else:
33
33
  _import_structure["pipeline_flux_fill"] = ["FluxFillPipeline"]
34
34
  _import_structure["pipeline_flux_img2img"] = ["FluxImg2ImgPipeline"]
35
35
  _import_structure["pipeline_flux_inpaint"] = ["FluxInpaintPipeline"]
36
+ _import_structure["pipeline_flux_kontext"] = ["FluxKontextPipeline"]
37
+ _import_structure["pipeline_flux_kontext_inpaint"] = ["FluxKontextInpaintPipeline"]
36
38
  _import_structure["pipeline_flux_prior_redux"] = ["FluxPriorReduxPipeline"]
37
39
  if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
38
40
  try:
@@ -52,6 +54,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
52
54
  from .pipeline_flux_fill import FluxFillPipeline
53
55
  from .pipeline_flux_img2img import FluxImg2ImgPipeline
54
56
  from .pipeline_flux_inpaint import FluxInpaintPipeline
57
+ from .pipeline_flux_kontext import FluxKontextPipeline
58
+ from .pipeline_flux_kontext_inpaint import FluxKontextInpaintPipeline
55
59
  from .pipeline_flux_prior_redux import FluxPriorReduxPipeline
56
60
  else:
57
61
  import sys
@@ -310,7 +310,7 @@ class FluxPipeline(
310
310
  def encode_prompt(
311
311
  self,
312
312
  prompt: Union[str, List[str]],
313
- prompt_2: Union[str, List[str]],
313
+ prompt_2: Optional[Union[str, List[str]]] = None,
314
314
  device: Optional[torch.device] = None,
315
315
  num_images_per_prompt: int = 1,
316
316
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -674,7 +674,8 @@ class FluxPipeline(
674
674
  The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
675
675
  `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders.
676
676
  true_cfg_scale (`float`, *optional*, defaults to 1.0):
677
- When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
677
+ True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and
678
+ `negative_prompt` is provided.
678
679
  height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
679
680
  The height in pixels of the generated image. This is set to 1024 by default for the best results.
680
681
  width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
@@ -687,11 +688,11 @@ class FluxPipeline(
687
688
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
688
689
  will be used.
689
690
  guidance_scale (`float`, *optional*, defaults to 3.5):
690
- Guidance scale as defined in [Classifier-Free Diffusion
691
- Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
692
- of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
693
- `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
694
- the text `prompt`, usually at the expense of lower image quality.
691
+ Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
692
+ a model to generate images more aligned with `prompt` at the expense of lower image quality.
693
+
694
+ Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
695
+ the [paper](https://huggingface.co/papers/2210.03142) to learn more.
695
696
  num_images_per_prompt (`int`, *optional*, defaults to 1):
696
697
  The number of images to generate per prompt.
697
698
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -840,6 +841,8 @@ class FluxPipeline(
840
841
 
841
842
  # 5. Prepare timesteps
842
843
  sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) if sigmas is None else sigmas
844
+ if hasattr(self.scheduler.config, "use_flow_sigmas") and self.scheduler.config.use_flow_sigmas:
845
+ sigmas = None
843
846
  image_seq_len = latents.shape[1]
844
847
  mu = calculate_shift(
845
848
  image_seq_len,
@@ -898,6 +901,8 @@ class FluxPipeline(
898
901
  )
899
902
 
900
903
  # 6. Denoising loop
904
+ # We set the index here to remove DtoH sync, helpful especially during compilation.
905
+ # Check out more details here: https://github.com/huggingface/diffusers/pull/11696
901
906
  self.scheduler.set_begin_index(0)
902
907
  with self.progress_bar(total=num_inference_steps) as progress_bar:
903
908
  for i, t in enumerate(timesteps):
@@ -910,32 +915,35 @@ class FluxPipeline(
910
915
  # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
911
916
  timestep = t.expand(latents.shape[0]).to(latents.dtype)
912
917
 
913
- noise_pred = self.transformer(
914
- hidden_states=latents,
915
- timestep=timestep / 1000,
916
- guidance=guidance,
917
- pooled_projections=pooled_prompt_embeds,
918
- encoder_hidden_states=prompt_embeds,
919
- txt_ids=text_ids,
920
- img_ids=latent_image_ids,
921
- joint_attention_kwargs=self.joint_attention_kwargs,
922
- return_dict=False,
923
- )[0]
924
-
925
- if do_true_cfg:
926
- if negative_image_embeds is not None:
927
- self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
928
- neg_noise_pred = self.transformer(
918
+ with self.transformer.cache_context("cond"):
919
+ noise_pred = self.transformer(
929
920
  hidden_states=latents,
930
921
  timestep=timestep / 1000,
931
922
  guidance=guidance,
932
- pooled_projections=negative_pooled_prompt_embeds,
933
- encoder_hidden_states=negative_prompt_embeds,
934
- txt_ids=negative_text_ids,
923
+ pooled_projections=pooled_prompt_embeds,
924
+ encoder_hidden_states=prompt_embeds,
925
+ txt_ids=text_ids,
935
926
  img_ids=latent_image_ids,
936
927
  joint_attention_kwargs=self.joint_attention_kwargs,
937
928
  return_dict=False,
938
929
  )[0]
930
+
931
+ if do_true_cfg:
932
+ if negative_image_embeds is not None:
933
+ self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
934
+
935
+ with self.transformer.cache_context("uncond"):
936
+ neg_noise_pred = self.transformer(
937
+ hidden_states=latents,
938
+ timestep=timestep / 1000,
939
+ guidance=guidance,
940
+ pooled_projections=negative_pooled_prompt_embeds,
941
+ encoder_hidden_states=negative_prompt_embeds,
942
+ txt_ids=negative_text_ids,
943
+ img_ids=latent_image_ids,
944
+ joint_attention_kwargs=self.joint_attention_kwargs,
945
+ return_dict=False,
946
+ )[0]
939
947
  noise_pred = neg_noise_pred + true_cfg_scale * (noise_pred - neg_noise_pred)
940
948
 
941
949
  # compute the previous noisy sample x_t -> x_t-1
@@ -163,9 +163,9 @@ class FluxControlPipeline(
163
163
  TextualInversionLoaderMixin,
164
164
  ):
165
165
  r"""
166
- The Flux pipeline for controllable text-to-image generation.
166
+ The Flux pipeline for controllable text-to-image generation with image conditions.
167
167
 
168
- Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
168
+ Reference: https://bfl.ai/flux-1-tools
169
169
 
170
170
  Args:
171
171
  transformer ([`FluxTransformer2DModel`]):
@@ -324,7 +324,7 @@ class FluxControlPipeline(
324
324
  def encode_prompt(
325
325
  self,
326
326
  prompt: Union[str, List[str]],
327
- prompt_2: Union[str, List[str]],
327
+ prompt_2: Optional[Union[str, List[str]]] = None,
328
328
  device: Optional[torch.device] = None,
329
329
  num_images_per_prompt: int = 1,
330
330
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -661,11 +661,11 @@ class FluxControlPipeline(
661
661
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
662
662
  will be used.
663
663
  guidance_scale (`float`, *optional*, defaults to 3.5):
664
- Guidance scale as defined in [Classifier-Free Diffusion
665
- Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
666
- of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
667
- `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
668
- the text `prompt`, usually at the expense of lower image quality.
664
+ Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
665
+ a model to generate images more aligned with prompt at the expense of lower image quality.
666
+
667
+ Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to
668
+ the [paper](https://huggingface.co/papers/2210.03142) to learn more.
669
669
  num_images_per_prompt (`int`, *optional*, defaults to 1):
670
670
  The number of images to generate per prompt.
671
671
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -335,7 +335,7 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
335
335
  def encode_prompt(
336
336
  self,
337
337
  prompt: Union[str, List[str]],
338
- prompt_2: Union[str, List[str]],
338
+ prompt_2: Optional[Union[str, List[str]]] = None,
339
339
  device: Optional[torch.device] = None,
340
340
  num_images_per_prompt: int = 1,
341
341
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -374,7 +374,7 @@ class FluxControlInpaintPipeline(
374
374
  def encode_prompt(
375
375
  self,
376
376
  prompt: Union[str, List[str]],
377
- prompt_2: Union[str, List[str]],
377
+ prompt_2: Optional[Union[str, List[str]]] = None,
378
378
  device: Optional[torch.device] = None,
379
379
  num_images_per_prompt: int = 1,
380
380
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -341,7 +341,7 @@ class FluxControlNetPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleF
341
341
  def encode_prompt(
342
342
  self,
343
343
  prompt: Union[str, List[str]],
344
- prompt_2: Union[str, List[str]],
344
+ prompt_2: Optional[Union[str, List[str]]] = None,
345
345
  device: Optional[torch.device] = None,
346
346
  num_images_per_prompt: int = 1,
347
347
  prompt_embeds: Optional[torch.FloatTensor] = None,