diffusers 0.29.2__py3-none-any.whl → 0.30.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +94 -3
- diffusers/commands/env.py +1 -5
- diffusers/configuration_utils.py +4 -9
- diffusers/dependency_versions_table.py +2 -2
- diffusers/image_processor.py +1 -2
- diffusers/loaders/__init__.py +17 -2
- diffusers/loaders/ip_adapter.py +10 -7
- diffusers/loaders/lora_base.py +752 -0
- diffusers/loaders/lora_pipeline.py +2252 -0
- diffusers/loaders/peft.py +213 -5
- diffusers/loaders/single_file.py +3 -14
- diffusers/loaders/single_file_model.py +31 -10
- diffusers/loaders/single_file_utils.py +293 -8
- diffusers/loaders/textual_inversion.py +1 -6
- diffusers/loaders/unet.py +23 -208
- diffusers/models/__init__.py +20 -0
- diffusers/models/activations.py +22 -0
- diffusers/models/attention.py +386 -7
- diffusers/models/attention_processor.py +1937 -629
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_kl.py +14 -3
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1271 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +1 -1
- diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +1 -0
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vq_model.py +4 -4
- diffusers/models/controlnet.py +2 -3
- diffusers/models/controlnet_hunyuan.py +401 -0
- diffusers/models/controlnet_sd3.py +11 -11
- diffusers/models/controlnet_sparsectrl.py +789 -0
- diffusers/models/controlnet_xs.py +40 -10
- diffusers/models/downsampling.py +68 -0
- diffusers/models/embeddings.py +403 -36
- diffusers/models/model_loading_utils.py +1 -3
- diffusers/models/modeling_flax_utils.py +1 -6
- diffusers/models/modeling_utils.py +4 -16
- diffusers/models/normalization.py +203 -12
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +543 -0
- diffusers/models/transformers/cogvideox_transformer_3d.py +485 -0
- diffusers/models/transformers/hunyuan_transformer_2d.py +19 -15
- diffusers/models/transformers/latte_transformer_3d.py +327 -0
- diffusers/models/transformers/lumina_nextdit2d.py +340 -0
- diffusers/models/transformers/pixart_transformer_2d.py +102 -1
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/stable_audio_transformer.py +458 -0
- diffusers/models/transformers/transformer_flux.py +455 -0
- diffusers/models/transformers/transformer_sd3.py +18 -4
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_condition.py +8 -1
- diffusers/models/unets/unet_3d_blocks.py +51 -920
- diffusers/models/unets/unet_3d_condition.py +4 -1
- diffusers/models/unets/unet_i2vgen_xl.py +4 -1
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +1330 -84
- diffusers/models/unets/unet_spatio_temporal_condition.py +1 -1
- diffusers/models/unets/unet_stable_cascade.py +1 -3
- diffusers/models/unets/uvit_2d.py +1 -1
- diffusers/models/upsampling.py +64 -0
- diffusers/models/vq_model.py +8 -4
- diffusers/optimization.py +1 -1
- diffusers/pipelines/__init__.py +100 -3
- diffusers/pipelines/animatediff/__init__.py +4 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +50 -40
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1076 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +17 -27
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1008 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +51 -38
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +1 -0
- diffusers/pipelines/aura_flow/__init__.py +48 -0
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +591 -0
- diffusers/pipelines/auto_pipeline.py +97 -19
- diffusers/pipelines/cogvideo/__init__.py +48 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +746 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +24 -30
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +31 -30
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +24 -153
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +19 -28
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +18 -28
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +29 -32
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
- diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1042 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +35 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +10 -6
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +0 -4
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +2 -2
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -6
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +6 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +3 -3
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
- diffusers/pipelines/flux/__init__.py +47 -0
- diffusers/pipelines/flux/pipeline_flux.py +749 -0
- diffusers/pipelines/flux/pipeline_output.py +21 -0
- diffusers/pipelines/free_init_utils.py +2 -0
- diffusers/pipelines/free_noise_utils.py +236 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +2 -2
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +2 -2
- diffusers/pipelines/kolors/__init__.py +54 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1247 -0
- diffusers/pipelines/kolors/pipeline_output.py +21 -0
- diffusers/pipelines/kolors/text_encoder.py +889 -0
- diffusers/pipelines/kolors/tokenizer.py +334 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +30 -29
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +23 -29
- diffusers/pipelines/latte/__init__.py +48 -0
- diffusers/pipelines/latte/pipeline_latte.py +881 -0
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +4 -4
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +0 -4
- diffusers/pipelines/lumina/__init__.py +48 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +897 -0
- diffusers/pipelines/pag/__init__.py +67 -0
- diffusers/pipelines/pag/pag_utils.py +237 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1329 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1612 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +953 -0
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +872 -0
- diffusers/pipelines/pag/pipeline_pag_sd.py +1050 -0
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +985 -0
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +862 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1333 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1529 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1753 -0
- diffusers/pipelines/pia/pipeline_pia.py +30 -37
- diffusers/pipelines/pipeline_flax_utils.py +4 -9
- diffusers/pipelines/pipeline_loading_utils.py +0 -3
- diffusers/pipelines/pipeline_utils.py +2 -14
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +0 -1
- diffusers/pipelines/stable_audio/__init__.py +50 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +745 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +2 -0
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +23 -29
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +15 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +30 -29
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +23 -152
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +8 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +8 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +6 -6
- diffusers/pipelines/stable_diffusion_3/__init__.py +2 -0
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +34 -3
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +33 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1201 -0
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +3 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +6 -6
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +5 -5
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +5 -5
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +6 -6
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +0 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +23 -29
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +27 -29
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +3 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +17 -27
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +26 -29
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +17 -145
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +0 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +6 -6
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -28
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +8 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +8 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +6 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +0 -4
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -4
- diffusers/schedulers/__init__.py +8 -0
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
- diffusers/schedulers/scheduling_ddim.py +1 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +449 -0
- diffusers/schedulers/scheduling_ddpm.py +1 -1
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -1
- diffusers/schedulers/scheduling_deis_multistep.py +2 -2
- diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +1 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +1 -1
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +64 -19
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +2 -2
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +63 -39
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +321 -0
- diffusers/schedulers/scheduling_ipndm.py +1 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +1 -1
- diffusers/schedulers/scheduling_utils.py +1 -3
- diffusers/schedulers/scheduling_utils_flax.py +1 -3
- diffusers/training_utils.py +99 -14
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +210 -0
- diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +315 -0
- diffusers/utils/dynamic_modules_utils.py +1 -11
- diffusers/utils/export_utils.py +50 -6
- diffusers/utils/hub_utils.py +45 -42
- diffusers/utils/import_utils.py +37 -15
- diffusers/utils/loading_utils.py +80 -3
- diffusers/utils/testing_utils.py +11 -8
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/METADATA +73 -83
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/RECORD +217 -164
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/WHEEL +1 -1
- diffusers/loaders/autoencoder.py +0 -146
- diffusers/loaders/controlnet.py +0 -136
- diffusers/loaders/lora.py +0 -1728
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/LICENSE +0 -0
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/top_level.txt +0 -0
@@ -41,8 +41,6 @@ from ...loaders import (
|
|
41
41
|
from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
|
42
42
|
from ...models.attention_processor import (
|
43
43
|
AttnProcessor2_0,
|
44
|
-
LoRAAttnProcessor2_0,
|
45
|
-
LoRAXFormersAttnProcessor,
|
46
44
|
XFormersAttnProcessor,
|
47
45
|
)
|
48
46
|
from ...models.lora import adjust_lora_scale_text_encoder
|
@@ -556,6 +554,9 @@ class StableDiffusionXLControlNetPipeline(
|
|
556
554
|
def prepare_ip_adapter_image_embeds(
|
557
555
|
self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
|
558
556
|
):
|
557
|
+
image_embeds = []
|
558
|
+
if do_classifier_free_guidance:
|
559
|
+
negative_image_embeds = []
|
559
560
|
if ip_adapter_image_embeds is None:
|
560
561
|
if not isinstance(ip_adapter_image, list):
|
561
562
|
ip_adapter_image = [ip_adapter_image]
|
@@ -565,7 +566,6 @@ class StableDiffusionXLControlNetPipeline(
|
|
565
566
|
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
|
566
567
|
)
|
567
568
|
|
568
|
-
image_embeds = []
|
569
569
|
for single_ip_adapter_image, image_proj_layer in zip(
|
570
570
|
ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
|
571
571
|
):
|
@@ -573,36 +573,28 @@ class StableDiffusionXLControlNetPipeline(
|
|
573
573
|
single_image_embeds, single_negative_image_embeds = self.encode_image(
|
574
574
|
single_ip_adapter_image, device, 1, output_hidden_state
|
575
575
|
)
|
576
|
-
single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
|
577
|
-
single_negative_image_embeds = torch.stack(
|
578
|
-
[single_negative_image_embeds] * num_images_per_prompt, dim=0
|
579
|
-
)
|
580
576
|
|
577
|
+
image_embeds.append(single_image_embeds[None, :])
|
581
578
|
if do_classifier_free_guidance:
|
582
|
-
|
583
|
-
single_image_embeds = single_image_embeds.to(device)
|
584
|
-
|
585
|
-
image_embeds.append(single_image_embeds)
|
579
|
+
negative_image_embeds.append(single_negative_image_embeds[None, :])
|
586
580
|
else:
|
587
|
-
repeat_dims = [1]
|
588
|
-
image_embeds = []
|
589
581
|
for single_image_embeds in ip_adapter_image_embeds:
|
590
582
|
if do_classifier_free_guidance:
|
591
583
|
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
592
|
-
|
593
|
-
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
594
|
-
)
|
595
|
-
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
596
|
-
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
597
|
-
)
|
598
|
-
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
599
|
-
else:
|
600
|
-
single_image_embeds = single_image_embeds.repeat(
|
601
|
-
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
602
|
-
)
|
584
|
+
negative_image_embeds.append(single_negative_image_embeds)
|
603
585
|
image_embeds.append(single_image_embeds)
|
604
586
|
|
605
|
-
|
587
|
+
ip_adapter_image_embeds = []
|
588
|
+
for i, single_image_embeds in enumerate(image_embeds):
|
589
|
+
single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
|
590
|
+
if do_classifier_free_guidance:
|
591
|
+
single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
|
592
|
+
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
|
593
|
+
|
594
|
+
single_image_embeds = single_image_embeds.to(device=device)
|
595
|
+
ip_adapter_image_embeds.append(single_image_embeds)
|
596
|
+
|
597
|
+
return ip_adapter_image_embeds
|
606
598
|
|
607
599
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
608
600
|
def prepare_extra_step_kwargs(self, generator, eta):
|
@@ -931,8 +923,6 @@ class StableDiffusionXLControlNetPipeline(
|
|
931
923
|
(
|
932
924
|
AttnProcessor2_0,
|
933
925
|
XFormersAttnProcessor,
|
934
|
-
LoRAXFormersAttnProcessor,
|
935
|
-
LoRAAttnProcessor2_0,
|
936
926
|
),
|
937
927
|
)
|
938
928
|
# if xformers or torch_2_0 is used attention block does not need
|
@@ -1497,7 +1487,7 @@ class StableDiffusionXLControlNetPipeline(
|
|
1497
1487
|
)
|
1498
1488
|
|
1499
1489
|
if guess_mode and self.do_classifier_free_guidance:
|
1500
|
-
#
|
1490
|
+
# Inferred ControlNet only for the conditional batch.
|
1501
1491
|
# To apply the output of ControlNet to both the unconditional and conditional batches,
|
1502
1492
|
# add 0 to the unconditional batch to keep it unchanged.
|
1503
1493
|
down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples]
|
@@ -41,8 +41,6 @@ from ...loaders import (
|
|
41
41
|
from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
|
42
42
|
from ...models.attention_processor import (
|
43
43
|
AttnProcessor2_0,
|
44
|
-
LoRAAttnProcessor2_0,
|
45
|
-
LoRAXFormersAttnProcessor,
|
46
44
|
XFormersAttnProcessor,
|
47
45
|
)
|
48
46
|
from ...models.lora import adjust_lora_scale_text_encoder
|
@@ -78,13 +76,13 @@ EXAMPLE_DOC_STRING = """
|
|
78
76
|
>>> import numpy as np
|
79
77
|
>>> from PIL import Image
|
80
78
|
|
81
|
-
>>> from transformers import
|
79
|
+
>>> from transformers import DPTImageProcessor, DPTForDepthEstimation
|
82
80
|
>>> from diffusers import ControlNetModel, StableDiffusionXLControlNetImg2ImgPipeline, AutoencoderKL
|
83
81
|
>>> from diffusers.utils import load_image
|
84
82
|
|
85
83
|
|
86
84
|
>>> depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to("cuda")
|
87
|
-
>>> feature_extractor =
|
85
|
+
>>> feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
|
88
86
|
>>> controlnet = ControlNetModel.from_pretrained(
|
89
87
|
... "diffusers/controlnet-depth-sdxl-1.0-small",
|
90
88
|
... variant="fp16",
|
@@ -550,6 +548,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
550
548
|
def prepare_ip_adapter_image_embeds(
|
551
549
|
self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
|
552
550
|
):
|
551
|
+
image_embeds = []
|
552
|
+
if do_classifier_free_guidance:
|
553
|
+
negative_image_embeds = []
|
553
554
|
if ip_adapter_image_embeds is None:
|
554
555
|
if not isinstance(ip_adapter_image, list):
|
555
556
|
ip_adapter_image = [ip_adapter_image]
|
@@ -559,7 +560,6 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
559
560
|
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
|
560
561
|
)
|
561
562
|
|
562
|
-
image_embeds = []
|
563
563
|
for single_ip_adapter_image, image_proj_layer in zip(
|
564
564
|
ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
|
565
565
|
):
|
@@ -567,36 +567,28 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
567
567
|
single_image_embeds, single_negative_image_embeds = self.encode_image(
|
568
568
|
single_ip_adapter_image, device, 1, output_hidden_state
|
569
569
|
)
|
570
|
-
single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
|
571
|
-
single_negative_image_embeds = torch.stack(
|
572
|
-
[single_negative_image_embeds] * num_images_per_prompt, dim=0
|
573
|
-
)
|
574
570
|
|
571
|
+
image_embeds.append(single_image_embeds[None, :])
|
575
572
|
if do_classifier_free_guidance:
|
576
|
-
|
577
|
-
single_image_embeds = single_image_embeds.to(device)
|
578
|
-
|
579
|
-
image_embeds.append(single_image_embeds)
|
573
|
+
negative_image_embeds.append(single_negative_image_embeds[None, :])
|
580
574
|
else:
|
581
|
-
repeat_dims = [1]
|
582
|
-
image_embeds = []
|
583
575
|
for single_image_embeds in ip_adapter_image_embeds:
|
584
576
|
if do_classifier_free_guidance:
|
585
577
|
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
586
|
-
|
587
|
-
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
588
|
-
)
|
589
|
-
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
590
|
-
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
591
|
-
)
|
592
|
-
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
593
|
-
else:
|
594
|
-
single_image_embeds = single_image_embeds.repeat(
|
595
|
-
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
596
|
-
)
|
578
|
+
negative_image_embeds.append(single_negative_image_embeds)
|
597
579
|
image_embeds.append(single_image_embeds)
|
598
580
|
|
599
|
-
|
581
|
+
ip_adapter_image_embeds = []
|
582
|
+
for i, single_image_embeds in enumerate(image_embeds):
|
583
|
+
single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
|
584
|
+
if do_classifier_free_guidance:
|
585
|
+
single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
|
586
|
+
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
|
587
|
+
|
588
|
+
single_image_embeds = single_image_embeds.to(device=device)
|
589
|
+
ip_adapter_image_embeds.append(single_image_embeds)
|
590
|
+
|
591
|
+
return ip_adapter_image_embeds
|
600
592
|
|
601
593
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
602
594
|
def prepare_extra_step_kwargs(self, generator, eta):
|
@@ -938,6 +930,13 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
938
930
|
)
|
939
931
|
|
940
932
|
elif isinstance(generator, list):
|
933
|
+
if image.shape[0] < batch_size and batch_size % image.shape[0] == 0:
|
934
|
+
image = torch.cat([image] * (batch_size // image.shape[0]), dim=0)
|
935
|
+
elif image.shape[0] < batch_size and batch_size % image.shape[0] != 0:
|
936
|
+
raise ValueError(
|
937
|
+
f"Cannot duplicate `image` of batch size {image.shape[0]} to effective batch_size {batch_size} "
|
938
|
+
)
|
939
|
+
|
941
940
|
init_latents = [
|
942
941
|
retrieve_latents(self.vae.encode(image[i : i + 1]), generator=generator[i])
|
943
942
|
for i in range(batch_size)
|
@@ -951,8 +950,8 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
951
950
|
|
952
951
|
init_latents = init_latents.to(dtype)
|
953
952
|
if latents_mean is not None and latents_std is not None:
|
954
|
-
latents_mean = latents_mean.to(device=
|
955
|
-
latents_std = latents_std.to(device=
|
953
|
+
latents_mean = latents_mean.to(device=device, dtype=dtype)
|
954
|
+
latents_std = latents_std.to(device=device, dtype=dtype)
|
956
955
|
init_latents = (init_latents - latents_mean) * self.vae.config.scaling_factor / latents_std
|
957
956
|
else:
|
958
957
|
init_latents = self.vae.config.scaling_factor * init_latents
|
@@ -1039,8 +1038,6 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
1039
1038
|
(
|
1040
1039
|
AttnProcessor2_0,
|
1041
1040
|
XFormersAttnProcessor,
|
1042
|
-
LoRAXFormersAttnProcessor,
|
1043
|
-
LoRAAttnProcessor2_0,
|
1044
1041
|
),
|
1045
1042
|
)
|
1046
1043
|
# if xformers or torch_2_0 is used attention block does not need
|
@@ -1554,7 +1551,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
1554
1551
|
)
|
1555
1552
|
|
1556
1553
|
if guess_mode and self.do_classifier_free_guidance:
|
1557
|
-
#
|
1554
|
+
# Inferred ControlNet only for the conditional batch.
|
1558
1555
|
# To apply the output of ControlNet to both the unconditional and conditional batches,
|
1559
1556
|
# add 0 to the unconditional batch to keep it unchanged.
|
1560
1557
|
down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples]
|
@@ -23,7 +23,7 @@ from flax.core.frozen_dict import FrozenDict
|
|
23
23
|
from flax.jax_utils import unreplicate
|
24
24
|
from flax.training.common_utils import shard
|
25
25
|
from PIL import Image
|
26
|
-
from transformers import
|
26
|
+
from transformers import CLIPImageProcessor, CLIPTokenizer, FlaxCLIPTextModel
|
27
27
|
|
28
28
|
from ...models import FlaxAutoencoderKL, FlaxControlNetModel, FlaxUNet2DConditionModel
|
29
29
|
from ...schedulers import (
|
@@ -149,7 +149,7 @@ class FlaxStableDiffusionControlNetPipeline(FlaxDiffusionPipeline):
|
|
149
149
|
FlaxDDIMScheduler, FlaxPNDMScheduler, FlaxLMSDiscreteScheduler, FlaxDPMSolverMultistepScheduler
|
150
150
|
],
|
151
151
|
safety_checker: FlaxStableDiffusionSafetyChecker,
|
152
|
-
feature_extractor:
|
152
|
+
feature_extractor: CLIPImageProcessor,
|
153
153
|
dtype: jnp.dtype = jnp.float32,
|
154
154
|
):
|
155
155
|
super().__init__()
|
@@ -0,0 +1,48 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_import_structure = {}
|
15
|
+
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_hunyuandit_controlnet"] = ["HunyuanDiTControlNetPipeline"]
|
26
|
+
|
27
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
|
+
try:
|
29
|
+
if not (is_transformers_available() and is_torch_available()):
|
30
|
+
raise OptionalDependencyNotAvailable()
|
31
|
+
|
32
|
+
except OptionalDependencyNotAvailable:
|
33
|
+
from ...utils.dummy_torch_and_transformers_objects import *
|
34
|
+
else:
|
35
|
+
from .pipeline_hunyuandit_controlnet import HunyuanDiTControlNetPipeline
|
36
|
+
|
37
|
+
else:
|
38
|
+
import sys
|
39
|
+
|
40
|
+
sys.modules[__name__] = _LazyModule(
|
41
|
+
__name__,
|
42
|
+
globals()["__file__"],
|
43
|
+
_import_structure,
|
44
|
+
module_spec=__spec__,
|
45
|
+
)
|
46
|
+
|
47
|
+
for name, value in _dummy_objects.items():
|
48
|
+
setattr(sys.modules[__name__], name, value)
|