diffusers 0.30.3__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +97 -4
- diffusers/callbacks.py +56 -3
- diffusers/configuration_utils.py +13 -1
- diffusers/image_processor.py +282 -71
- diffusers/loaders/__init__.py +24 -3
- diffusers/loaders/ip_adapter.py +543 -16
- diffusers/loaders/lora_base.py +138 -125
- diffusers/loaders/lora_conversion_utils.py +647 -0
- diffusers/loaders/lora_pipeline.py +2216 -230
- diffusers/loaders/peft.py +380 -0
- diffusers/loaders/single_file_model.py +71 -4
- diffusers/loaders/single_file_utils.py +597 -10
- diffusers/loaders/textual_inversion.py +5 -3
- diffusers/loaders/transformer_flux.py +181 -0
- diffusers/loaders/transformer_sd3.py +89 -0
- diffusers/loaders/unet.py +56 -12
- diffusers/models/__init__.py +49 -12
- diffusers/models/activations.py +22 -9
- diffusers/models/adapter.py +53 -53
- diffusers/models/attention.py +98 -13
- diffusers/models/attention_flax.py +1 -1
- diffusers/models/attention_processor.py +2160 -346
- diffusers/models/autoencoders/__init__.py +5 -0
- diffusers/models/autoencoders/autoencoder_dc.py +620 -0
- diffusers/models/autoencoders/autoencoder_kl.py +73 -12
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +213 -105
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
- diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
- diffusers/models/autoencoders/vae.py +18 -5
- diffusers/models/controlnet.py +47 -802
- diffusers/models/controlnet_flux.py +70 -0
- diffusers/models/controlnet_sd3.py +26 -376
- diffusers/models/controlnet_sparsectrl.py +46 -719
- diffusers/models/controlnets/__init__.py +23 -0
- diffusers/models/controlnets/controlnet.py +872 -0
- diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
- diffusers/models/controlnets/controlnet_flux.py +536 -0
- diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
- diffusers/models/controlnets/controlnet_sd3.py +489 -0
- diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
- diffusers/models/controlnets/controlnet_union.py +832 -0
- diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
- diffusers/models/controlnets/multicontrolnet.py +183 -0
- diffusers/models/embeddings.py +996 -92
- diffusers/models/embeddings_flax.py +23 -9
- diffusers/models/model_loading_utils.py +264 -14
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +334 -51
- diffusers/models/normalization.py +157 -13
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +3 -2
- diffusers/models/transformers/cogvideox_transformer_3d.py +69 -13
- diffusers/models/transformers/dit_transformer_2d.py +1 -1
- diffusers/models/transformers/latte_transformer_3d.py +4 -4
- diffusers/models/transformers/pixart_transformer_2d.py +10 -2
- diffusers/models/transformers/sana_transformer.py +488 -0
- diffusers/models/transformers/stable_audio_transformer.py +1 -1
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +422 -0
- diffusers/models/transformers/transformer_cogview3plus.py +386 -0
- diffusers/models/transformers/transformer_flux.py +189 -51
- diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
- diffusers/models/transformers/transformer_ltx.py +469 -0
- diffusers/models/transformers/transformer_mochi.py +499 -0
- diffusers/models/transformers/transformer_sd3.py +112 -18
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +8 -1
- diffusers/models/unets/unet_2d_blocks.py +88 -21
- diffusers/models/unets/unet_2d_condition.py +9 -9
- diffusers/models/unets/unet_3d_blocks.py +9 -7
- diffusers/models/unets/unet_motion_model.py +46 -68
- diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
- diffusers/models/unets/unet_stable_cascade.py +2 -2
- diffusers/models/unets/uvit_2d.py +1 -1
- diffusers/models/upsampling.py +14 -6
- diffusers/pipelines/__init__.py +69 -6
- diffusers/pipelines/allegro/__init__.py +48 -0
- diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
- diffusers/pipelines/allegro/pipeline_output.py +23 -0
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +52 -22
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +3 -1
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -72
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +2 -9
- diffusers/pipelines/auto_pipeline.py +88 -10
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/cogvideo/__init__.py +2 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +80 -39
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +108 -50
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +89 -50
- diffusers/pipelines/cogview3/__init__.py +47 -0
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
- diffusers/pipelines/cogview3/pipeline_output.py +21 -0
- diffusers/pipelines/controlnet/__init__.py +86 -80
- diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +9 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +9 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +37 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +12 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +9 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +22 -4
- diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +56 -20
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
- diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +32 -9
- diffusers/pipelines/flux/__init__.py +23 -1
- diffusers/pipelines/flux/modeling_flux.py +47 -0
- diffusers/pipelines/flux/pipeline_flux.py +256 -48
- diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
- diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
- diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
- diffusers/pipelines/flux/pipeline_output.py +16 -0
- diffusers/pipelines/free_noise_utils.py +365 -5
- diffusers/pipelines/hunyuan_video/__init__.py +48 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
- diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +20 -4
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
- diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
- diffusers/pipelines/kolors/text_encoder.py +2 -2
- diffusers/pipelines/kolors/tokenizer.py +4 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latte/pipeline_latte.py +2 -2
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
- diffusers/pipelines/ltx/__init__.py +50 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
- diffusers/pipelines/ltx/pipeline_output.py +20 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +3 -10
- diffusers/pipelines/mochi/__init__.py +48 -0
- diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
- diffusers/pipelines/mochi/pipeline_output.py +20 -0
- diffusers/pipelines/pag/__init__.py +13 -0
- diffusers/pipelines/pag/pag_utils.py +8 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +2 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +22 -6
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +7 -14
- diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
- diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +18 -9
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
- diffusers/pipelines/pia/pipeline_pia.py +2 -0
- diffusers/pipelines/pipeline_flax_utils.py +1 -1
- diffusers/pipelines/pipeline_loading_utils.py +250 -31
- diffusers/pipelines/pipeline_utils.py +158 -186
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +7 -14
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +7 -14
- diffusers/pipelines/sana/__init__.py +47 -0
- diffusers/pipelines/sana/pipeline_output.py +21 -0
- diffusers/pipelines/sana/pipeline_sana.py +884 -0
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +46 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +228 -23
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +82 -13
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +60 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -22
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -22
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
- diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/quantizers/__init__.py +16 -0
- diffusers/quantizers/auto.py +139 -0
- diffusers/quantizers/base.py +233 -0
- diffusers/quantizers/bitsandbytes/__init__.py +2 -0
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
- diffusers/quantizers/bitsandbytes/utils.py +306 -0
- diffusers/quantizers/gguf/__init__.py +1 -0
- diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
- diffusers/quantizers/gguf/utils.py +456 -0
- diffusers/quantizers/quantization_config.py +669 -0
- diffusers/quantizers/torchao/__init__.py +15 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
- diffusers/schedulers/scheduling_ddim.py +4 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
- diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
- diffusers/schedulers/scheduling_ddpm.py +6 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +6 -7
- diffusers/schedulers/scheduling_deis_multistep.py +102 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +113 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +111 -5
- diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +126 -7
- diffusers/schedulers/scheduling_edm_euler.py +8 -6
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
- diffusers/schedulers/scheduling_euler_discrete.py +92 -7
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
- diffusers/schedulers/scheduling_heun_discrete.py +114 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
- diffusers/schedulers/scheduling_lcm.py +2 -6
- diffusers/schedulers/scheduling_lms_discrete.py +76 -1
- diffusers/schedulers/scheduling_repaint.py +1 -1
- diffusers/schedulers/scheduling_sasolver.py +102 -6
- diffusers/schedulers/scheduling_tcd.py +2 -6
- diffusers/schedulers/scheduling_unclip.py +4 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +127 -5
- diffusers/training_utils.py +63 -19
- diffusers/utils/__init__.py +7 -1
- diffusers/utils/constants.py +1 -0
- diffusers/utils/dummy_pt_objects.py +240 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +435 -0
- diffusers/utils/dynamic_modules_utils.py +3 -3
- diffusers/utils/hub_utils.py +44 -40
- diffusers/utils/import_utils.py +98 -8
- diffusers/utils/loading_utils.py +28 -4
- diffusers/utils/peft_utils.py +6 -3
- diffusers/utils/testing_utils.py +115 -1
- diffusers/utils/torch_utils.py +3 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/METADATA +73 -72
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/RECORD +268 -193
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0
@@ -141,9 +141,21 @@ def get_resize_crop_region_for_grid(src, tgt_size):
|
|
141
141
|
|
142
142
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
143
143
|
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
144
|
-
"""
|
145
|
-
|
146
|
-
|
144
|
+
r"""
|
145
|
+
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
146
|
+
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
147
|
+
Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
148
|
+
|
149
|
+
Args:
|
150
|
+
noise_cfg (`torch.Tensor`):
|
151
|
+
The predicted noise tensor for the guided diffusion process.
|
152
|
+
noise_pred_text (`torch.Tensor`):
|
153
|
+
The predicted noise tensor for the text-guided diffusion process.
|
154
|
+
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
155
|
+
A rescale factor applied to the noise predictions.
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
|
147
159
|
"""
|
148
160
|
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
149
161
|
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
@@ -225,6 +237,8 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
|
|
225
237
|
requires_safety_checker: bool = True,
|
226
238
|
):
|
227
239
|
super().__init__()
|
240
|
+
if isinstance(controlnet, (list, tuple)):
|
241
|
+
controlnet = HunyuanDiT2DMultiControlNetModel(controlnet)
|
228
242
|
|
229
243
|
self.register_modules(
|
230
244
|
vae=vae,
|
@@ -911,7 +925,11 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
|
|
911
925
|
base_size = 512 // 8 // self.transformer.config.patch_size
|
912
926
|
grid_crops_coords = get_resize_crop_region_for_grid((grid_height, grid_width), base_size)
|
913
927
|
image_rotary_emb = get_2d_rotary_pos_embed(
|
914
|
-
self.transformer.inner_dim // self.transformer.num_heads,
|
928
|
+
self.transformer.inner_dim // self.transformer.num_heads,
|
929
|
+
grid_crops_coords,
|
930
|
+
(grid_height, grid_width),
|
931
|
+
device=device,
|
932
|
+
output_type="pt",
|
915
933
|
)
|
916
934
|
|
917
935
|
style = torch.tensor([0], device=device)
|
@@ -23,6 +23,9 @@ except OptionalDependencyNotAvailable:
|
|
23
23
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
24
|
else:
|
25
25
|
_import_structure["pipeline_stable_diffusion_3_controlnet"] = ["StableDiffusion3ControlNetPipeline"]
|
26
|
+
_import_structure["pipeline_stable_diffusion_3_controlnet_inpainting"] = [
|
27
|
+
"StableDiffusion3ControlNetInpaintingPipeline"
|
28
|
+
]
|
26
29
|
|
27
30
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
31
|
try:
|
@@ -33,6 +36,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
33
36
|
from ...utils.dummy_torch_and_transformers_objects import *
|
34
37
|
else:
|
35
38
|
from .pipeline_stable_diffusion_3_controlnet import StableDiffusion3ControlNetPipeline
|
39
|
+
from .pipeline_stable_diffusion_3_controlnet_inpainting import StableDiffusion3ControlNetInpaintingPipeline
|
36
40
|
|
37
41
|
try:
|
38
42
|
if not (is_transformers_available() and is_flax_available()):
|
@@ -26,7 +26,7 @@ from transformers import (
|
|
26
26
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
27
27
|
from ...loaders import FromSingleFileMixin, SD3LoraLoaderMixin
|
28
28
|
from ...models.autoencoders import AutoencoderKL
|
29
|
-
from ...models.controlnet_sd3 import SD3ControlNetModel, SD3MultiControlNetModel
|
29
|
+
from ...models.controlnets.controlnet_sd3 import SD3ControlNetModel, SD3MultiControlNetModel
|
30
30
|
from ...models.transformers import SD3Transformer2DModel
|
31
31
|
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
32
32
|
from ...utils import (
|
@@ -66,9 +66,13 @@ EXAMPLE_DOC_STRING = """
|
|
66
66
|
... "stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
|
67
67
|
... )
|
68
68
|
>>> pipe.to("cuda")
|
69
|
-
>>> control_image = load_image(
|
70
|
-
|
71
|
-
|
69
|
+
>>> control_image = load_image(
|
70
|
+
... "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
|
71
|
+
... )
|
72
|
+
>>> prompt = "A bird in space"
|
73
|
+
>>> image = pipe(
|
74
|
+
... prompt, control_image=control_image, height=1024, width=768, controlnet_conditioning_scale=0.7
|
75
|
+
... ).images[0]
|
72
76
|
>>> image.save("sd3.png")
|
73
77
|
```
|
74
78
|
"""
|
@@ -83,7 +87,7 @@ def retrieve_timesteps(
|
|
83
87
|
sigmas: Optional[List[float]] = None,
|
84
88
|
**kwargs,
|
85
89
|
):
|
86
|
-
"""
|
90
|
+
r"""
|
87
91
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
88
92
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
89
93
|
|
@@ -192,6 +196,21 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
192
196
|
],
|
193
197
|
):
|
194
198
|
super().__init__()
|
199
|
+
if isinstance(controlnet, (list, tuple)):
|
200
|
+
controlnet = SD3MultiControlNetModel(controlnet)
|
201
|
+
if isinstance(controlnet, SD3MultiControlNetModel):
|
202
|
+
for controlnet_model in controlnet.nets:
|
203
|
+
# for SD3.5 8b controlnet, it shares the pos_embed with the transformer
|
204
|
+
if (
|
205
|
+
hasattr(controlnet_model.config, "use_pos_embed")
|
206
|
+
and controlnet_model.config.use_pos_embed is False
|
207
|
+
):
|
208
|
+
pos_embed = controlnet_model._get_pos_embed_from_transformer(transformer)
|
209
|
+
controlnet_model.pos_embed = pos_embed.to(controlnet_model.dtype).to(controlnet_model.device)
|
210
|
+
elif isinstance(controlnet, SD3ControlNetModel):
|
211
|
+
if hasattr(controlnet.config, "use_pos_embed") and controlnet.config.use_pos_embed is False:
|
212
|
+
pos_embed = controlnet._get_pos_embed_from_transformer(transformer)
|
213
|
+
controlnet.pos_embed = pos_embed.to(controlnet.dtype).to(controlnet.device)
|
195
214
|
|
196
215
|
self.register_modules(
|
197
216
|
vae=vae,
|
@@ -718,7 +737,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
718
737
|
height: Optional[int] = None,
|
719
738
|
width: Optional[int] = None,
|
720
739
|
num_inference_steps: int = 28,
|
721
|
-
|
740
|
+
sigmas: Optional[List[float]] = None,
|
722
741
|
guidance_scale: float = 7.0,
|
723
742
|
control_guidance_start: Union[float, List[float]] = 0.0,
|
724
743
|
control_guidance_end: Union[float, List[float]] = 1.0,
|
@@ -763,10 +782,10 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
763
782
|
num_inference_steps (`int`, *optional*, defaults to 50):
|
764
783
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
765
784
|
expense of slower inference.
|
766
|
-
|
767
|
-
Custom
|
768
|
-
|
769
|
-
|
785
|
+
sigmas (`List[float]`, *optional*):
|
786
|
+
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
787
|
+
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
788
|
+
will be used.
|
770
789
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
771
790
|
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
|
772
791
|
`guidance_scale` is defined as `w` of equation 2. of [Imagen
|
@@ -856,6 +875,12 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
856
875
|
height = height or self.default_sample_size * self.vae_scale_factor
|
857
876
|
width = width or self.default_sample_size * self.vae_scale_factor
|
858
877
|
|
878
|
+
controlnet_config = (
|
879
|
+
self.controlnet.config
|
880
|
+
if isinstance(self.controlnet, SD3ControlNetModel)
|
881
|
+
else self.controlnet.nets[0].config
|
882
|
+
)
|
883
|
+
|
859
884
|
# align format for control guidance
|
860
885
|
if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
|
861
886
|
control_guidance_start = len(control_guidance_end) * [control_guidance_start]
|
@@ -930,6 +955,11 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
930
955
|
pooled_prompt_embeds = torch.cat([negative_pooled_prompt_embeds, pooled_prompt_embeds], dim=0)
|
931
956
|
|
932
957
|
# 3. Prepare control image
|
958
|
+
if controlnet_config.force_zeros_for_pooled_projection:
|
959
|
+
# instantx sd3 controlnet does not apply shift factor
|
960
|
+
vae_shift_factor = 0
|
961
|
+
else:
|
962
|
+
vae_shift_factor = self.vae.config.shift_factor
|
933
963
|
if isinstance(self.controlnet, SD3ControlNetModel):
|
934
964
|
control_image = self.prepare_image(
|
935
965
|
image=control_image,
|
@@ -945,8 +975,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
945
975
|
height, width = control_image.shape[-2:]
|
946
976
|
|
947
977
|
control_image = self.vae.encode(control_image).latent_dist.sample()
|
948
|
-
control_image = control_image * self.vae.config.scaling_factor
|
949
|
-
|
978
|
+
control_image = (control_image - vae_shift_factor) * self.vae.config.scaling_factor
|
950
979
|
elif isinstance(self.controlnet, SD3MultiControlNetModel):
|
951
980
|
control_images = []
|
952
981
|
|
@@ -964,7 +993,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
964
993
|
)
|
965
994
|
|
966
995
|
control_image_ = self.vae.encode(control_image_).latent_dist.sample()
|
967
|
-
control_image_ = control_image_ * self.vae.config.scaling_factor
|
996
|
+
control_image_ = (control_image_ - vae_shift_factor) * self.vae.config.scaling_factor
|
968
997
|
|
969
998
|
control_images.append(control_image_)
|
970
999
|
|
@@ -972,13 +1001,8 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
972
1001
|
else:
|
973
1002
|
assert False
|
974
1003
|
|
975
|
-
if controlnet_pooled_projections is None:
|
976
|
-
controlnet_pooled_projections = torch.zeros_like(pooled_prompt_embeds)
|
977
|
-
else:
|
978
|
-
controlnet_pooled_projections = controlnet_pooled_projections or pooled_prompt_embeds
|
979
|
-
|
980
1004
|
# 4. Prepare timesteps
|
981
|
-
timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device,
|
1005
|
+
timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, sigmas=sigmas)
|
982
1006
|
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
|
983
1007
|
self._num_timesteps = len(timesteps)
|
984
1008
|
|
@@ -1004,6 +1028,18 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
1004
1028
|
]
|
1005
1029
|
controlnet_keep.append(keeps[0] if isinstance(self.controlnet, SD3ControlNetModel) else keeps)
|
1006
1030
|
|
1031
|
+
if controlnet_config.force_zeros_for_pooled_projection:
|
1032
|
+
# instantx sd3 controlnet used zero pooled projection
|
1033
|
+
controlnet_pooled_projections = torch.zeros_like(pooled_prompt_embeds)
|
1034
|
+
else:
|
1035
|
+
controlnet_pooled_projections = controlnet_pooled_projections or pooled_prompt_embeds
|
1036
|
+
|
1037
|
+
if controlnet_config.joint_attention_dim is not None:
|
1038
|
+
controlnet_encoder_hidden_states = prompt_embeds
|
1039
|
+
else:
|
1040
|
+
# SD35 official 8b controlnet does not use encoder_hidden_states
|
1041
|
+
controlnet_encoder_hidden_states = None
|
1042
|
+
|
1007
1043
|
# 7. Denoising loop
|
1008
1044
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
1009
1045
|
for i, t in enumerate(timesteps):
|
@@ -1027,7 +1063,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
1027
1063
|
control_block_samples = self.controlnet(
|
1028
1064
|
hidden_states=latent_model_input,
|
1029
1065
|
timestep=timestep,
|
1030
|
-
encoder_hidden_states=
|
1066
|
+
encoder_hidden_states=controlnet_encoder_hidden_states,
|
1031
1067
|
pooled_projections=controlnet_pooled_projections,
|
1032
1068
|
joint_attention_kwargs=self.joint_attention_kwargs,
|
1033
1069
|
controlnet_cond=control_image,
|