diffusers 0.34.0__py3-none-any.whl → 0.35.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +98 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +2 -0
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_table.py +3 -3
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +7 -6
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +292 -286
- diffusers/hooks/hooks.py +56 -1
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +2 -7
- diffusers/hooks/pyramid_attention_broadcast.py +14 -11
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +255 -4
- diffusers/loaders/lora_base.py +63 -30
- diffusers/loaders/lora_conversion_utils.py +434 -53
- diffusers/loaders/lora_pipeline.py +834 -37
- diffusers/loaders/peft.py +28 -5
- diffusers/loaders/single_file_model.py +44 -11
- diffusers/loaders/single_file_utils.py +170 -2
- diffusers/loaders/transformer_flux.py +9 -10
- diffusers/loaders/transformer_sd3.py +6 -1
- diffusers/loaders/unet.py +22 -5
- diffusers/loaders/unet_loader_utils.py +5 -2
- diffusers/models/__init__.py +8 -0
- diffusers/models/attention.py +484 -3
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_processor.py +105 -663
- diffusers/models/auto_model.py +2 -2
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_dc.py +14 -1
- diffusers/models/autoencoders/autoencoder_kl.py +1 -1
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -1
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_wan.py +370 -40
- diffusers/models/cache_utils.py +31 -9
- diffusers/models/controlnets/controlnet_flux.py +5 -5
- diffusers/models/controlnets/controlnet_union.py +4 -4
- diffusers/models/embeddings.py +26 -34
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +159 -94
- diffusers/models/transformers/__init__.py +2 -0
- diffusers/models/transformers/transformer_chroma.py +16 -117
- diffusers/models/transformers/transformer_cogview4.py +36 -2
- diffusers/models/transformers/transformer_cosmos.py +11 -4
- diffusers/models/transformers/transformer_flux.py +372 -132
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -0
- diffusers/models/transformers/transformer_ltx.py +104 -23
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_wan.py +298 -85
- diffusers/models/transformers/transformer_wan_vace.py +15 -21
- diffusers/models/unets/unet_2d_condition.py +2 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +31 -0
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +2 -3
- diffusers/pipelines/auto_pipeline.py +17 -13
- diffusers/pipelines/chroma/pipeline_chroma.py +5 -5
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +5 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +9 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +9 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +10 -9
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +9 -8
- diffusers/pipelines/cogview4/pipeline_cogview4.py +16 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +3 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +212 -93
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +7 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +194 -92
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +3 -1
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/pipeline_flux.py +34 -26
- diffusers/pipelines/flux/pipeline_flux_control.py +8 -8
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_img2img.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +5 -5
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +25 -24
- diffusers/pipelines/ltx/pipeline_ltx.py +13 -12
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +10 -9
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +13 -12
- diffusers/pipelines/mochi/pipeline_mochi.py +9 -8
- diffusers/pipelines/pipeline_flax_utils.py +2 -2
- diffusers/pipelines/pipeline_loading_utils.py +24 -2
- diffusers/pipelines/pipeline_utils.py +22 -15
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +3 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +20 -0
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +849 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +5 -5
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +6 -5
- diffusers/pipelines/wan/pipeline_wan.py +78 -20
- diffusers/pipelines/wan/pipeline_wan_i2v.py +112 -32
- diffusers/pipelines/wan/pipeline_wan_vace.py +1 -2
- diffusers/quantizers/__init__.py +1 -177
- diffusers/quantizers/base.py +11 -0
- diffusers/quantizers/gguf/utils.py +92 -3
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +26 -0
- diffusers/schedulers/scheduling_deis_multistep.py +8 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +6 -0
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +6 -0
- diffusers/schedulers/scheduling_scm.py +0 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +10 -1
- diffusers/schedulers/scheduling_utils.py +2 -2
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/training_utils.py +78 -0
- diffusers/utils/__init__.py +10 -0
- diffusers/utils/constants.py +4 -0
- diffusers/utils/dummy_pt_objects.py +312 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +255 -0
- diffusers/utils/dynamic_modules_utils.py +84 -25
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +70 -0
- diffusers/utils/peft_utils.py +11 -8
- diffusers/utils/testing_utils.py +136 -10
- diffusers/utils/torch_utils.py +18 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/METADATA +6 -6
- {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/RECORD +191 -127
- {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/LICENSE +0 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/WHEEL +0 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,6 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
19
19
|
import numpy as np
|
20
20
|
import PIL.Image
|
21
21
|
import torch
|
22
|
-
import torch.nn.functional as F
|
23
22
|
from transformers import (
|
24
23
|
CLIPImageProcessor,
|
25
24
|
CLIPTextModel,
|
@@ -38,7 +37,13 @@ from ...loaders import (
|
|
38
37
|
StableDiffusionXLLoraLoaderMixin,
|
39
38
|
TextualInversionLoaderMixin,
|
40
39
|
)
|
41
|
-
from ...models import
|
40
|
+
from ...models import (
|
41
|
+
AutoencoderKL,
|
42
|
+
ControlNetUnionModel,
|
43
|
+
ImageProjection,
|
44
|
+
MultiControlNetUnionModel,
|
45
|
+
UNet2DConditionModel,
|
46
|
+
)
|
42
47
|
from ...models.attention_processor import (
|
43
48
|
AttnProcessor2_0,
|
44
49
|
XFormersAttnProcessor,
|
@@ -262,7 +267,9 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
262
267
|
tokenizer: CLIPTokenizer,
|
263
268
|
tokenizer_2: CLIPTokenizer,
|
264
269
|
unet: UNet2DConditionModel,
|
265
|
-
controlnet:
|
270
|
+
controlnet: Union[
|
271
|
+
ControlNetUnionModel, List[ControlNetUnionModel], Tuple[ControlNetUnionModel], MultiControlNetUnionModel
|
272
|
+
],
|
266
273
|
scheduler: KarrasDiffusionSchedulers,
|
267
274
|
requires_aesthetics_score: bool = False,
|
268
275
|
force_zeros_for_empty_prompt: bool = True,
|
@@ -272,8 +279,8 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
272
279
|
):
|
273
280
|
super().__init__()
|
274
281
|
|
275
|
-
if
|
276
|
-
|
282
|
+
if isinstance(controlnet, (list, tuple)):
|
283
|
+
controlnet = MultiControlNetUnionModel(controlnet)
|
277
284
|
|
278
285
|
self.register_modules(
|
279
286
|
vae=vae,
|
@@ -649,6 +656,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
649
656
|
controlnet_conditioning_scale=1.0,
|
650
657
|
control_guidance_start=0.0,
|
651
658
|
control_guidance_end=1.0,
|
659
|
+
control_mode=None,
|
652
660
|
callback_on_step_end_tensor_inputs=None,
|
653
661
|
):
|
654
662
|
if strength < 0 or strength > 1:
|
@@ -722,28 +730,44 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
722
730
|
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
723
731
|
)
|
724
732
|
|
733
|
+
# `prompt` needs more sophisticated handling when there are multiple
|
734
|
+
# conditionings.
|
735
|
+
if isinstance(self.controlnet, MultiControlNetUnionModel):
|
736
|
+
if isinstance(prompt, list):
|
737
|
+
logger.warning(
|
738
|
+
f"You have {len(self.controlnet.nets)} ControlNets and you have passed {len(prompt)}"
|
739
|
+
" prompts. The conditionings will be fixed across the prompts."
|
740
|
+
)
|
741
|
+
|
725
742
|
# Check `image`
|
726
|
-
|
727
|
-
|
728
|
-
)
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
+
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
744
|
+
|
745
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
746
|
+
for image_ in image:
|
747
|
+
self.check_image(image_, prompt, prompt_embeds)
|
748
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
749
|
+
if not isinstance(image, list):
|
750
|
+
raise TypeError("For multiple controlnets: `image` must be type `list`")
|
751
|
+
elif not all(isinstance(i, list) for i in image):
|
752
|
+
raise ValueError("For multiple controlnets: elements of `image` must be list of conditionings.")
|
753
|
+
elif len(image) != len(self.controlnet.nets):
|
754
|
+
raise ValueError(
|
755
|
+
f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
|
756
|
+
)
|
757
|
+
|
758
|
+
for images_ in image:
|
759
|
+
for image_ in images_:
|
760
|
+
self.check_image(image_, prompt, prompt_embeds)
|
743
761
|
|
744
762
|
if not isinstance(control_guidance_start, (tuple, list)):
|
745
763
|
control_guidance_start = [control_guidance_start]
|
746
764
|
|
765
|
+
if isinstance(controlnet, MultiControlNetUnionModel):
|
766
|
+
if len(control_guidance_start) != len(self.controlnet.nets):
|
767
|
+
raise ValueError(
|
768
|
+
f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
|
769
|
+
)
|
770
|
+
|
747
771
|
if not isinstance(control_guidance_end, (tuple, list)):
|
748
772
|
control_guidance_end = [control_guidance_end]
|
749
773
|
|
@@ -762,6 +786,15 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
762
786
|
if end > 1.0:
|
763
787
|
raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
|
764
788
|
|
789
|
+
# Check `control_mode`
|
790
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
791
|
+
if max(control_mode) >= controlnet.config.num_control_type:
|
792
|
+
raise ValueError(f"control_mode: must be lower than {controlnet.config.num_control_type}.")
|
793
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
794
|
+
for _control_mode, _controlnet in zip(control_mode, self.controlnet.nets):
|
795
|
+
if max(_control_mode) >= _controlnet.config.num_control_type:
|
796
|
+
raise ValueError(f"control_mode: must be lower than {_controlnet.config.num_control_type}.")
|
797
|
+
|
765
798
|
if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
|
766
799
|
raise ValueError(
|
767
800
|
"Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
|
@@ -1049,7 +1082,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1049
1082
|
prompt: Union[str, List[str]] = None,
|
1050
1083
|
prompt_2: Optional[Union[str, List[str]]] = None,
|
1051
1084
|
image: PipelineImageInput = None,
|
1052
|
-
control_image: PipelineImageInput = None,
|
1085
|
+
control_image: Union[PipelineImageInput, List[PipelineImageInput]] = None,
|
1053
1086
|
height: Optional[int] = None,
|
1054
1087
|
width: Optional[int] = None,
|
1055
1088
|
strength: float = 0.8,
|
@@ -1074,7 +1107,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1074
1107
|
guess_mode: bool = False,
|
1075
1108
|
control_guidance_start: Union[float, List[float]] = 0.0,
|
1076
1109
|
control_guidance_end: Union[float, List[float]] = 1.0,
|
1077
|
-
control_mode: Optional[Union[int, List[int]]] = None,
|
1110
|
+
control_mode: Optional[Union[int, List[int], List[List[int]]]] = None,
|
1078
1111
|
original_size: Tuple[int, int] = None,
|
1079
1112
|
crops_coords_top_left: Tuple[int, int] = (0, 0),
|
1080
1113
|
target_size: Tuple[int, int] = None,
|
@@ -1104,13 +1137,13 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1104
1137
|
`List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
|
1105
1138
|
The initial image will be used as the starting point for the image generation process. Can also accept
|
1106
1139
|
image latents as `image`, if passing latents directly, it will not be encoded again.
|
1107
|
-
control_image (`PipelineImageInput`):
|
1108
|
-
The ControlNet input condition
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1140
|
+
control_image (`PipelineImageInput` or `List[PipelineImageInput]`, *optional*):
|
1141
|
+
The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
|
1142
|
+
specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
|
1143
|
+
as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
|
1144
|
+
width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
|
1145
|
+
images must be passed as a list such that each element of the list can be correctly batched for input
|
1146
|
+
to a single ControlNet.
|
1114
1147
|
height (`int`, *optional*, defaults to the size of control_image):
|
1115
1148
|
The height in pixels of the generated image. Anything below 512 pixels won't work well for
|
1116
1149
|
[stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
|
@@ -1184,16 +1217,21 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1184
1217
|
`self.processor` in
|
1185
1218
|
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
1186
1219
|
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
1187
|
-
The outputs of the
|
1188
|
-
to the residual in the original unet
|
1189
|
-
corresponding scale as a list.
|
1220
|
+
The outputs of the ControlNet are multiplied by `controlnet_conditioning_scale` before they are added
|
1221
|
+
to the residual in the original `unet`. If multiple ControlNets are specified in `init`, you can set
|
1222
|
+
the corresponding scale as a list.
|
1190
1223
|
guess_mode (`bool`, *optional*, defaults to `False`):
|
1191
1224
|
In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
|
1192
1225
|
you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
|
1193
1226
|
control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
|
1194
|
-
The percentage of total steps at which the
|
1227
|
+
The percentage of total steps at which the ControlNet starts applying.
|
1195
1228
|
control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
|
1196
|
-
The percentage of total steps at which the
|
1229
|
+
The percentage of total steps at which the ControlNet stops applying.
|
1230
|
+
control_mode (`int` or `List[int]` or `List[List[int]], *optional*):
|
1231
|
+
The control condition types for the ControlNet. See the ControlNet's model card forinformation on the
|
1232
|
+
available control modes. If multiple ControlNets are specified in `init`, control_mode should be a list
|
1233
|
+
where each ControlNet should have its corresponding control mode list. Should reflect the order of
|
1234
|
+
conditions in control_image
|
1197
1235
|
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
1198
1236
|
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
1199
1237
|
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
@@ -1273,12 +1311,6 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1273
1311
|
|
1274
1312
|
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
1275
1313
|
|
1276
|
-
# align format for control guidance
|
1277
|
-
if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
|
1278
|
-
control_guidance_start = len(control_guidance_end) * [control_guidance_start]
|
1279
|
-
elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
|
1280
|
-
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
1281
|
-
|
1282
1314
|
if not isinstance(control_image, list):
|
1283
1315
|
control_image = [control_image]
|
1284
1316
|
else:
|
@@ -1287,37 +1319,56 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1287
1319
|
if not isinstance(control_mode, list):
|
1288
1320
|
control_mode = [control_mode]
|
1289
1321
|
|
1290
|
-
if
|
1291
|
-
|
1322
|
+
if isinstance(controlnet, MultiControlNetUnionModel):
|
1323
|
+
control_image = [[item] for item in control_image]
|
1324
|
+
control_mode = [[item] for item in control_mode]
|
1292
1325
|
|
1293
|
-
|
1326
|
+
# align format for control guidance
|
1327
|
+
if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
|
1328
|
+
control_guidance_start = len(control_guidance_end) * [control_guidance_start]
|
1329
|
+
elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
|
1330
|
+
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
1331
|
+
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
1332
|
+
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetUnionModel) else len(control_mode)
|
1333
|
+
control_guidance_start, control_guidance_end = (
|
1334
|
+
mult * [control_guidance_start],
|
1335
|
+
mult * [control_guidance_end],
|
1336
|
+
)
|
1337
|
+
|
1338
|
+
if isinstance(controlnet_conditioning_scale, float):
|
1339
|
+
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetUnionModel) else len(control_mode)
|
1340
|
+
controlnet_conditioning_scale = [controlnet_conditioning_scale] * mult
|
1294
1341
|
|
1295
1342
|
# 1. Check inputs
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
1317
|
-
callback_on_step_end_tensor_inputs,
|
1318
|
-
)
|
1343
|
+
self.check_inputs(
|
1344
|
+
prompt,
|
1345
|
+
prompt_2,
|
1346
|
+
control_image,
|
1347
|
+
strength,
|
1348
|
+
num_inference_steps,
|
1349
|
+
callback_steps,
|
1350
|
+
negative_prompt,
|
1351
|
+
negative_prompt_2,
|
1352
|
+
prompt_embeds,
|
1353
|
+
negative_prompt_embeds,
|
1354
|
+
pooled_prompt_embeds,
|
1355
|
+
negative_pooled_prompt_embeds,
|
1356
|
+
ip_adapter_image,
|
1357
|
+
ip_adapter_image_embeds,
|
1358
|
+
controlnet_conditioning_scale,
|
1359
|
+
control_guidance_start,
|
1360
|
+
control_guidance_end,
|
1361
|
+
control_mode,
|
1362
|
+
callback_on_step_end_tensor_inputs,
|
1363
|
+
)
|
1319
1364
|
|
1320
|
-
|
1365
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
1366
|
+
control_type = torch.zeros(controlnet.config.num_control_type).scatter_(0, torch.tensor(control_mode), 1)
|
1367
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
1368
|
+
control_type = [
|
1369
|
+
torch.zeros(controlnet_.config.num_control_type).scatter_(0, torch.tensor(control_mode_), 1)
|
1370
|
+
for control_mode_, controlnet_ in zip(control_mode, self.controlnet.nets)
|
1371
|
+
]
|
1321
1372
|
|
1322
1373
|
self._guidance_scale = guidance_scale
|
1323
1374
|
self._clip_skip = clip_skip
|
@@ -1334,7 +1385,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1334
1385
|
|
1335
1386
|
device = self._execution_device
|
1336
1387
|
|
1337
|
-
global_pool_conditions =
|
1388
|
+
global_pool_conditions = (
|
1389
|
+
controlnet.config.global_pool_conditions
|
1390
|
+
if isinstance(controlnet, ControlNetUnionModel)
|
1391
|
+
else controlnet.nets[0].config.global_pool_conditions
|
1392
|
+
)
|
1338
1393
|
guess_mode = guess_mode or global_pool_conditions
|
1339
1394
|
|
1340
1395
|
# 3.1. Encode input prompt
|
@@ -1372,22 +1427,55 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1372
1427
|
self.do_classifier_free_guidance,
|
1373
1428
|
)
|
1374
1429
|
|
1375
|
-
# 4. Prepare image
|
1430
|
+
# 4.1 Prepare image
|
1376
1431
|
image = self.image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
|
1377
1432
|
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1433
|
+
# 4.2 Prepare control images
|
1434
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
1435
|
+
control_images = []
|
1436
|
+
|
1437
|
+
for image_ in control_image:
|
1438
|
+
image_ = self.prepare_control_image(
|
1439
|
+
image=image_,
|
1440
|
+
width=width,
|
1441
|
+
height=height,
|
1442
|
+
batch_size=batch_size * num_images_per_prompt,
|
1443
|
+
num_images_per_prompt=num_images_per_prompt,
|
1444
|
+
device=device,
|
1445
|
+
dtype=controlnet.dtype,
|
1446
|
+
do_classifier_free_guidance=self.do_classifier_free_guidance,
|
1447
|
+
guess_mode=guess_mode,
|
1448
|
+
)
|
1449
|
+
|
1450
|
+
control_images.append(image_)
|
1451
|
+
|
1452
|
+
control_image = control_images
|
1453
|
+
height, width = control_image[0].shape[-2:]
|
1454
|
+
|
1455
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
1456
|
+
control_images = []
|
1457
|
+
|
1458
|
+
for control_image_ in control_image:
|
1459
|
+
images = []
|
1460
|
+
|
1461
|
+
for image_ in control_image_:
|
1462
|
+
image_ = self.prepare_control_image(
|
1463
|
+
image=image_,
|
1464
|
+
width=width,
|
1465
|
+
height=height,
|
1466
|
+
batch_size=batch_size * num_images_per_prompt,
|
1467
|
+
num_images_per_prompt=num_images_per_prompt,
|
1468
|
+
device=device,
|
1469
|
+
dtype=controlnet.dtype,
|
1470
|
+
do_classifier_free_guidance=self.do_classifier_free_guidance,
|
1471
|
+
guess_mode=guess_mode,
|
1472
|
+
)
|
1473
|
+
|
1474
|
+
images.append(image_)
|
1475
|
+
control_images.append(images)
|
1476
|
+
|
1477
|
+
control_image = control_images
|
1478
|
+
height, width = control_image[0][0].shape[-2:]
|
1391
1479
|
|
1392
1480
|
# 5. Prepare timesteps
|
1393
1481
|
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
@@ -1414,10 +1502,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1414
1502
|
# 7.1 Create tensor stating which controlnets to keep
|
1415
1503
|
controlnet_keep = []
|
1416
1504
|
for i in range(len(timesteps)):
|
1417
|
-
|
1418
|
-
1.0
|
1419
|
-
|
1420
|
-
|
1505
|
+
keeps = [
|
1506
|
+
1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
|
1507
|
+
for s, e in zip(control_guidance_start, control_guidance_end)
|
1508
|
+
]
|
1509
|
+
controlnet_keep.append(keeps)
|
1421
1510
|
|
1422
1511
|
# 7.2 Prepare added time ids & embeddings
|
1423
1512
|
original_size = original_size or (height, width)
|
@@ -1460,12 +1549,25 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1460
1549
|
prompt_embeds = prompt_embeds.to(device)
|
1461
1550
|
add_text_embeds = add_text_embeds.to(device)
|
1462
1551
|
add_time_ids = add_time_ids.to(device)
|
1463
|
-
|
1464
|
-
|
1465
|
-
|
1466
|
-
.repeat(batch_size * num_images_per_prompt * 2, 1)
|
1552
|
+
|
1553
|
+
control_type_repeat_factor = (
|
1554
|
+
batch_size * num_images_per_prompt * (2 if self.do_classifier_free_guidance else 1)
|
1467
1555
|
)
|
1468
1556
|
|
1557
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
1558
|
+
control_type = (
|
1559
|
+
control_type.reshape(1, -1)
|
1560
|
+
.to(self._execution_device, dtype=prompt_embeds.dtype)
|
1561
|
+
.repeat(control_type_repeat_factor, 1)
|
1562
|
+
)
|
1563
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
1564
|
+
control_type = [
|
1565
|
+
_control_type.reshape(1, -1)
|
1566
|
+
.to(self._execution_device, dtype=prompt_embeds.dtype)
|
1567
|
+
.repeat(control_type_repeat_factor, 1)
|
1568
|
+
for _control_type in control_type
|
1569
|
+
]
|
1570
|
+
|
1469
1571
|
# 8. Denoising loop
|
1470
1572
|
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
1471
1573
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
@@ -717,7 +717,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Sta
|
|
717
717
|
from diffusers import CycleDiffusionPipeline, DDIMScheduler
|
718
718
|
|
719
719
|
# load the pipeline
|
720
|
-
# make sure you're logged in with `
|
720
|
+
# make sure you're logged in with `hf auth login`
|
721
721
|
model_id_or_path = "CompVis/stable-diffusion-v1-4"
|
722
722
|
scheduler = DDIMScheduler.from_pretrained(model_id_or_path, subfolder="scheduler")
|
723
723
|
pipe = CycleDiffusionPipeline.from_pretrained(model_id_or_path, scheduler=scheduler).to("cuda")
|
@@ -46,7 +46,9 @@ class DiTPipeline(DiffusionPipeline):
|
|
46
46
|
|
47
47
|
Parameters:
|
48
48
|
transformer ([`DiTTransformer2DModel`]):
|
49
|
-
A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents.
|
49
|
+
A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents. Initially published as
|
50
|
+
[`Transformer2DModel`](https://huggingface.co/facebook/DiT-XL-2-256/blob/main/transformer/config.json#L2)
|
51
|
+
in the config, but the mismatch can be ignored.
|
50
52
|
vae ([`AutoencoderKL`]):
|
51
53
|
Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
|
52
54
|
scheduler ([`DDIMScheduler`]):
|
@@ -33,6 +33,8 @@ else:
|
|
33
33
|
_import_structure["pipeline_flux_fill"] = ["FluxFillPipeline"]
|
34
34
|
_import_structure["pipeline_flux_img2img"] = ["FluxImg2ImgPipeline"]
|
35
35
|
_import_structure["pipeline_flux_inpaint"] = ["FluxInpaintPipeline"]
|
36
|
+
_import_structure["pipeline_flux_kontext"] = ["FluxKontextPipeline"]
|
37
|
+
_import_structure["pipeline_flux_kontext_inpaint"] = ["FluxKontextInpaintPipeline"]
|
36
38
|
_import_structure["pipeline_flux_prior_redux"] = ["FluxPriorReduxPipeline"]
|
37
39
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
38
40
|
try:
|
@@ -52,6 +54,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
52
54
|
from .pipeline_flux_fill import FluxFillPipeline
|
53
55
|
from .pipeline_flux_img2img import FluxImg2ImgPipeline
|
54
56
|
from .pipeline_flux_inpaint import FluxInpaintPipeline
|
57
|
+
from .pipeline_flux_kontext import FluxKontextPipeline
|
58
|
+
from .pipeline_flux_kontext_inpaint import FluxKontextInpaintPipeline
|
55
59
|
from .pipeline_flux_prior_redux import FluxPriorReduxPipeline
|
56
60
|
else:
|
57
61
|
import sys
|
@@ -310,7 +310,7 @@ class FluxPipeline(
|
|
310
310
|
def encode_prompt(
|
311
311
|
self,
|
312
312
|
prompt: Union[str, List[str]],
|
313
|
-
prompt_2: Union[str, List[str]],
|
313
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
314
314
|
device: Optional[torch.device] = None,
|
315
315
|
num_images_per_prompt: int = 1,
|
316
316
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -674,7 +674,8 @@ class FluxPipeline(
|
|
674
674
|
The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
|
675
675
|
`text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders.
|
676
676
|
true_cfg_scale (`float`, *optional*, defaults to 1.0):
|
677
|
-
|
677
|
+
True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and
|
678
|
+
`negative_prompt` is provided.
|
678
679
|
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
679
680
|
The height in pixels of the generated image. This is set to 1024 by default for the best results.
|
680
681
|
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
@@ -687,11 +688,11 @@ class FluxPipeline(
|
|
687
688
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
688
689
|
will be used.
|
689
690
|
guidance_scale (`float`, *optional*, defaults to 3.5):
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
the
|
691
|
+
Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
|
692
|
+
a model to generate images more aligned with `prompt` at the expense of lower image quality.
|
693
|
+
|
694
|
+
Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
|
695
|
+
the [paper](https://huggingface.co/papers/2210.03142) to learn more.
|
695
696
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
696
697
|
The number of images to generate per prompt.
|
697
698
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -840,6 +841,8 @@ class FluxPipeline(
|
|
840
841
|
|
841
842
|
# 5. Prepare timesteps
|
842
843
|
sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) if sigmas is None else sigmas
|
844
|
+
if hasattr(self.scheduler.config, "use_flow_sigmas") and self.scheduler.config.use_flow_sigmas:
|
845
|
+
sigmas = None
|
843
846
|
image_seq_len = latents.shape[1]
|
844
847
|
mu = calculate_shift(
|
845
848
|
image_seq_len,
|
@@ -898,6 +901,8 @@ class FluxPipeline(
|
|
898
901
|
)
|
899
902
|
|
900
903
|
# 6. Denoising loop
|
904
|
+
# We set the index here to remove DtoH sync, helpful especially during compilation.
|
905
|
+
# Check out more details here: https://github.com/huggingface/diffusers/pull/11696
|
901
906
|
self.scheduler.set_begin_index(0)
|
902
907
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
903
908
|
for i, t in enumerate(timesteps):
|
@@ -910,32 +915,35 @@ class FluxPipeline(
|
|
910
915
|
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
911
916
|
timestep = t.expand(latents.shape[0]).to(latents.dtype)
|
912
917
|
|
913
|
-
|
914
|
-
|
915
|
-
timestep=timestep / 1000,
|
916
|
-
guidance=guidance,
|
917
|
-
pooled_projections=pooled_prompt_embeds,
|
918
|
-
encoder_hidden_states=prompt_embeds,
|
919
|
-
txt_ids=text_ids,
|
920
|
-
img_ids=latent_image_ids,
|
921
|
-
joint_attention_kwargs=self.joint_attention_kwargs,
|
922
|
-
return_dict=False,
|
923
|
-
)[0]
|
924
|
-
|
925
|
-
if do_true_cfg:
|
926
|
-
if negative_image_embeds is not None:
|
927
|
-
self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
|
928
|
-
neg_noise_pred = self.transformer(
|
918
|
+
with self.transformer.cache_context("cond"):
|
919
|
+
noise_pred = self.transformer(
|
929
920
|
hidden_states=latents,
|
930
921
|
timestep=timestep / 1000,
|
931
922
|
guidance=guidance,
|
932
|
-
pooled_projections=
|
933
|
-
encoder_hidden_states=
|
934
|
-
txt_ids=
|
923
|
+
pooled_projections=pooled_prompt_embeds,
|
924
|
+
encoder_hidden_states=prompt_embeds,
|
925
|
+
txt_ids=text_ids,
|
935
926
|
img_ids=latent_image_ids,
|
936
927
|
joint_attention_kwargs=self.joint_attention_kwargs,
|
937
928
|
return_dict=False,
|
938
929
|
)[0]
|
930
|
+
|
931
|
+
if do_true_cfg:
|
932
|
+
if negative_image_embeds is not None:
|
933
|
+
self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
|
934
|
+
|
935
|
+
with self.transformer.cache_context("uncond"):
|
936
|
+
neg_noise_pred = self.transformer(
|
937
|
+
hidden_states=latents,
|
938
|
+
timestep=timestep / 1000,
|
939
|
+
guidance=guidance,
|
940
|
+
pooled_projections=negative_pooled_prompt_embeds,
|
941
|
+
encoder_hidden_states=negative_prompt_embeds,
|
942
|
+
txt_ids=negative_text_ids,
|
943
|
+
img_ids=latent_image_ids,
|
944
|
+
joint_attention_kwargs=self.joint_attention_kwargs,
|
945
|
+
return_dict=False,
|
946
|
+
)[0]
|
939
947
|
noise_pred = neg_noise_pred + true_cfg_scale * (noise_pred - neg_noise_pred)
|
940
948
|
|
941
949
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -163,9 +163,9 @@ class FluxControlPipeline(
|
|
163
163
|
TextualInversionLoaderMixin,
|
164
164
|
):
|
165
165
|
r"""
|
166
|
-
The Flux pipeline for controllable text-to-image generation.
|
166
|
+
The Flux pipeline for controllable text-to-image generation with image conditions.
|
167
167
|
|
168
|
-
Reference: https://
|
168
|
+
Reference: https://bfl.ai/flux-1-tools
|
169
169
|
|
170
170
|
Args:
|
171
171
|
transformer ([`FluxTransformer2DModel`]):
|
@@ -324,7 +324,7 @@ class FluxControlPipeline(
|
|
324
324
|
def encode_prompt(
|
325
325
|
self,
|
326
326
|
prompt: Union[str, List[str]],
|
327
|
-
prompt_2: Union[str, List[str]],
|
327
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
328
328
|
device: Optional[torch.device] = None,
|
329
329
|
num_images_per_prompt: int = 1,
|
330
330
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -661,11 +661,11 @@ class FluxControlPipeline(
|
|
661
661
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
662
662
|
will be used.
|
663
663
|
guidance_scale (`float`, *optional*, defaults to 3.5):
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
the
|
664
|
+
Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
|
665
|
+
a model to generate images more aligned with prompt at the expense of lower image quality.
|
666
|
+
|
667
|
+
Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to
|
668
|
+
the [paper](https://huggingface.co/papers/2210.03142) to learn more.
|
669
669
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
670
670
|
The number of images to generate per prompt.
|
671
671
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -335,7 +335,7 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
|
|
335
335
|
def encode_prompt(
|
336
336
|
self,
|
337
337
|
prompt: Union[str, List[str]],
|
338
|
-
prompt_2: Union[str, List[str]],
|
338
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
339
339
|
device: Optional[torch.device] = None,
|
340
340
|
num_images_per_prompt: int = 1,
|
341
341
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -374,7 +374,7 @@ class FluxControlInpaintPipeline(
|
|
374
374
|
def encode_prompt(
|
375
375
|
self,
|
376
376
|
prompt: Union[str, List[str]],
|
377
|
-
prompt_2: Union[str, List[str]],
|
377
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
378
378
|
device: Optional[torch.device] = None,
|
379
379
|
num_images_per_prompt: int = 1,
|
380
380
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -341,7 +341,7 @@ class FluxControlNetPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleF
|
|
341
341
|
def encode_prompt(
|
342
342
|
self,
|
343
343
|
prompt: Union[str, List[str]],
|
344
|
-
prompt_2: Union[str, List[str]],
|
344
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
345
345
|
device: Optional[torch.device] = None,
|
346
346
|
num_images_per_prompt: int = 1,
|
347
347
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|