diffusers 0.29.2__py3-none-any.whl → 0.30.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +94 -3
- diffusers/commands/env.py +1 -5
- diffusers/configuration_utils.py +4 -9
- diffusers/dependency_versions_table.py +2 -2
- diffusers/image_processor.py +1 -2
- diffusers/loaders/__init__.py +17 -2
- diffusers/loaders/ip_adapter.py +10 -7
- diffusers/loaders/lora_base.py +752 -0
- diffusers/loaders/lora_pipeline.py +2252 -0
- diffusers/loaders/peft.py +213 -5
- diffusers/loaders/single_file.py +3 -14
- diffusers/loaders/single_file_model.py +31 -10
- diffusers/loaders/single_file_utils.py +293 -8
- diffusers/loaders/textual_inversion.py +1 -6
- diffusers/loaders/unet.py +23 -208
- diffusers/models/__init__.py +20 -0
- diffusers/models/activations.py +22 -0
- diffusers/models/attention.py +386 -7
- diffusers/models/attention_processor.py +1937 -629
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_kl.py +14 -3
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1271 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +1 -1
- diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +1 -0
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vq_model.py +4 -4
- diffusers/models/controlnet.py +2 -3
- diffusers/models/controlnet_hunyuan.py +401 -0
- diffusers/models/controlnet_sd3.py +11 -11
- diffusers/models/controlnet_sparsectrl.py +789 -0
- diffusers/models/controlnet_xs.py +40 -10
- diffusers/models/downsampling.py +68 -0
- diffusers/models/embeddings.py +403 -36
- diffusers/models/model_loading_utils.py +1 -3
- diffusers/models/modeling_flax_utils.py +1 -6
- diffusers/models/modeling_utils.py +4 -16
- diffusers/models/normalization.py +203 -12
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +543 -0
- diffusers/models/transformers/cogvideox_transformer_3d.py +485 -0
- diffusers/models/transformers/hunyuan_transformer_2d.py +19 -15
- diffusers/models/transformers/latte_transformer_3d.py +327 -0
- diffusers/models/transformers/lumina_nextdit2d.py +340 -0
- diffusers/models/transformers/pixart_transformer_2d.py +102 -1
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/stable_audio_transformer.py +458 -0
- diffusers/models/transformers/transformer_flux.py +455 -0
- diffusers/models/transformers/transformer_sd3.py +18 -4
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_condition.py +8 -1
- diffusers/models/unets/unet_3d_blocks.py +51 -920
- diffusers/models/unets/unet_3d_condition.py +4 -1
- diffusers/models/unets/unet_i2vgen_xl.py +4 -1
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +1330 -84
- diffusers/models/unets/unet_spatio_temporal_condition.py +1 -1
- diffusers/models/unets/unet_stable_cascade.py +1 -3
- diffusers/models/unets/uvit_2d.py +1 -1
- diffusers/models/upsampling.py +64 -0
- diffusers/models/vq_model.py +8 -4
- diffusers/optimization.py +1 -1
- diffusers/pipelines/__init__.py +100 -3
- diffusers/pipelines/animatediff/__init__.py +4 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +50 -40
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1076 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +17 -27
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1008 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +51 -38
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +1 -0
- diffusers/pipelines/aura_flow/__init__.py +48 -0
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +591 -0
- diffusers/pipelines/auto_pipeline.py +97 -19
- diffusers/pipelines/cogvideo/__init__.py +48 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +746 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +24 -30
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +31 -30
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +24 -153
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +19 -28
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +18 -28
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +29 -32
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
- diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1042 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +35 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +10 -6
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +0 -4
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +2 -2
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -6
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +6 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +3 -3
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
- diffusers/pipelines/flux/__init__.py +47 -0
- diffusers/pipelines/flux/pipeline_flux.py +749 -0
- diffusers/pipelines/flux/pipeline_output.py +21 -0
- diffusers/pipelines/free_init_utils.py +2 -0
- diffusers/pipelines/free_noise_utils.py +236 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +2 -2
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +2 -2
- diffusers/pipelines/kolors/__init__.py +54 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1247 -0
- diffusers/pipelines/kolors/pipeline_output.py +21 -0
- diffusers/pipelines/kolors/text_encoder.py +889 -0
- diffusers/pipelines/kolors/tokenizer.py +334 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +30 -29
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +23 -29
- diffusers/pipelines/latte/__init__.py +48 -0
- diffusers/pipelines/latte/pipeline_latte.py +881 -0
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +4 -4
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +0 -4
- diffusers/pipelines/lumina/__init__.py +48 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +897 -0
- diffusers/pipelines/pag/__init__.py +67 -0
- diffusers/pipelines/pag/pag_utils.py +237 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1329 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1612 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +953 -0
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +872 -0
- diffusers/pipelines/pag/pipeline_pag_sd.py +1050 -0
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +985 -0
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +862 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1333 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1529 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1753 -0
- diffusers/pipelines/pia/pipeline_pia.py +30 -37
- diffusers/pipelines/pipeline_flax_utils.py +4 -9
- diffusers/pipelines/pipeline_loading_utils.py +0 -3
- diffusers/pipelines/pipeline_utils.py +2 -14
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +0 -1
- diffusers/pipelines/stable_audio/__init__.py +50 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +745 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +2 -0
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +23 -29
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +15 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +30 -29
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +23 -152
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +8 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +8 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +6 -6
- diffusers/pipelines/stable_diffusion_3/__init__.py +2 -0
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +34 -3
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +33 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1201 -0
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +3 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +6 -6
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +5 -5
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +5 -5
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +6 -6
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +0 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +23 -29
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +27 -29
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +3 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +17 -27
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +26 -29
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +17 -145
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +0 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +6 -6
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -28
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +8 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +8 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +6 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +0 -4
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -4
- diffusers/schedulers/__init__.py +8 -0
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
- diffusers/schedulers/scheduling_ddim.py +1 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +449 -0
- diffusers/schedulers/scheduling_ddpm.py +1 -1
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -1
- diffusers/schedulers/scheduling_deis_multistep.py +2 -2
- diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +1 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +1 -1
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +64 -19
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +2 -2
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +63 -39
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +321 -0
- diffusers/schedulers/scheduling_ipndm.py +1 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +1 -1
- diffusers/schedulers/scheduling_utils.py +1 -3
- diffusers/schedulers/scheduling_utils_flax.py +1 -3
- diffusers/training_utils.py +99 -14
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +210 -0
- diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +315 -0
- diffusers/utils/dynamic_modules_utils.py +1 -11
- diffusers/utils/export_utils.py +50 -6
- diffusers/utils/hub_utils.py +45 -42
- diffusers/utils/import_utils.py +37 -15
- diffusers/utils/loading_utils.py +80 -3
- diffusers/utils/testing_utils.py +11 -8
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/METADATA +73 -83
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/RECORD +217 -164
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/WHEEL +1 -1
- diffusers/loaders/autoencoder.py +0 -146
- diffusers/loaders/controlnet.py +0 -136
- diffusers/loaders/lora.py +0 -1728
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/LICENSE +0 -0
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/top_level.txt +0 -0
diffusers/loaders/unet.py
CHANGED
@@ -11,13 +11,11 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
import inspect
|
15
14
|
import os
|
16
15
|
from collections import defaultdict
|
17
16
|
from contextlib import nullcontext
|
18
|
-
from functools import partial
|
19
17
|
from pathlib import Path
|
20
|
-
from typing import Callable, Dict,
|
18
|
+
from typing import Callable, Dict, Union
|
21
19
|
|
22
20
|
import safetensors
|
23
21
|
import torch
|
@@ -38,18 +36,14 @@ from ..utils import (
|
|
38
36
|
USE_PEFT_BACKEND,
|
39
37
|
_get_model_file,
|
40
38
|
convert_unet_state_dict_to_peft,
|
41
|
-
delete_adapter_layers,
|
42
39
|
get_adapter_name,
|
43
40
|
get_peft_kwargs,
|
44
41
|
is_accelerate_available,
|
45
42
|
is_peft_version,
|
46
43
|
is_torch_version,
|
47
44
|
logging,
|
48
|
-
set_adapter_layers,
|
49
|
-
set_weights_and_activate_adapters,
|
50
45
|
)
|
51
|
-
from .
|
52
|
-
from .unet_loader_utils import _maybe_expand_lora_scales
|
46
|
+
from .lora_pipeline import LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE, TEXT_ENCODER_NAME, UNET_NAME
|
53
47
|
from .utils import AttnProcsLayers
|
54
48
|
|
55
49
|
|
@@ -97,9 +91,7 @@ class UNet2DConditionLoadersMixin:
|
|
97
91
|
force_download (`bool`, *optional*, defaults to `False`):
|
98
92
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
99
93
|
cached versions if they exist.
|
100
|
-
|
101
|
-
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
102
|
-
of Diffusers.
|
94
|
+
|
103
95
|
proxies (`Dict[str, str]`, *optional*):
|
104
96
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
105
97
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
@@ -140,7 +132,6 @@ class UNet2DConditionLoadersMixin:
|
|
140
132
|
"""
|
141
133
|
cache_dir = kwargs.pop("cache_dir", None)
|
142
134
|
force_download = kwargs.pop("force_download", False)
|
143
|
-
resume_download = kwargs.pop("resume_download", None)
|
144
135
|
proxies = kwargs.pop("proxies", None)
|
145
136
|
local_files_only = kwargs.pop("local_files_only", None)
|
146
137
|
token = kwargs.pop("token", None)
|
@@ -174,7 +165,6 @@ class UNet2DConditionLoadersMixin:
|
|
174
165
|
weights_name=weight_name or LORA_WEIGHT_NAME_SAFE,
|
175
166
|
cache_dir=cache_dir,
|
176
167
|
force_download=force_download,
|
177
|
-
resume_download=resume_download,
|
178
168
|
proxies=proxies,
|
179
169
|
local_files_only=local_files_only,
|
180
170
|
token=token,
|
@@ -194,7 +184,6 @@ class UNet2DConditionLoadersMixin:
|
|
194
184
|
weights_name=weight_name or LORA_WEIGHT_NAME,
|
195
185
|
cache_dir=cache_dir,
|
196
186
|
force_download=force_download,
|
197
|
-
resume_download=resume_download,
|
198
187
|
proxies=proxies,
|
199
188
|
local_files_only=local_files_only,
|
200
189
|
token=token,
|
@@ -362,7 +351,7 @@ class UNet2DConditionLoadersMixin:
|
|
362
351
|
return is_model_cpu_offload, is_sequential_cpu_offload
|
363
352
|
|
364
353
|
@classmethod
|
365
|
-
# Copied from diffusers.loaders.
|
354
|
+
# Copied from diffusers.loaders.lora_base.LoraBaseMixin._optionally_disable_offloading
|
366
355
|
def _optionally_disable_offloading(cls, _pipeline):
|
367
356
|
"""
|
368
357
|
Optionally removes offloading in case the pipeline has been already sequentially offloaded to CPU.
|
@@ -457,6 +446,15 @@ class UNet2DConditionLoadersMixin:
|
|
457
446
|
)
|
458
447
|
if is_custom_diffusion:
|
459
448
|
state_dict = self._get_custom_diffusion_state_dict()
|
449
|
+
if save_function is None and safe_serialization:
|
450
|
+
# safetensors does not support saving dicts with non-tensor values
|
451
|
+
empty_state_dict = {k: v for k, v in state_dict.items() if not isinstance(v, torch.Tensor)}
|
452
|
+
if len(empty_state_dict) > 0:
|
453
|
+
logger.warning(
|
454
|
+
f"Safetensors does not support saving dicts with non-tensor values. "
|
455
|
+
f"The following keys will be ignored: {empty_state_dict.keys()}"
|
456
|
+
)
|
457
|
+
state_dict = {k: v for k, v in state_dict.items() if isinstance(v, torch.Tensor)}
|
460
458
|
else:
|
461
459
|
if not USE_PEFT_BACKEND:
|
462
460
|
raise ValueError("PEFT backend is required for saving LoRAs using the `save_attn_procs()` method.")
|
@@ -515,194 +513,6 @@ class UNet2DConditionLoadersMixin:
|
|
515
513
|
|
516
514
|
return state_dict
|
517
515
|
|
518
|
-
def fuse_lora(self, lora_scale=1.0, safe_fusing=False, adapter_names=None):
|
519
|
-
if not USE_PEFT_BACKEND:
|
520
|
-
raise ValueError("PEFT backend is required for `fuse_lora()`.")
|
521
|
-
|
522
|
-
self.lora_scale = lora_scale
|
523
|
-
self._safe_fusing = safe_fusing
|
524
|
-
self.apply(partial(self._fuse_lora_apply, adapter_names=adapter_names))
|
525
|
-
|
526
|
-
def _fuse_lora_apply(self, module, adapter_names=None):
|
527
|
-
from peft.tuners.tuners_utils import BaseTunerLayer
|
528
|
-
|
529
|
-
merge_kwargs = {"safe_merge": self._safe_fusing}
|
530
|
-
|
531
|
-
if isinstance(module, BaseTunerLayer):
|
532
|
-
if self.lora_scale != 1.0:
|
533
|
-
module.scale_layer(self.lora_scale)
|
534
|
-
|
535
|
-
# For BC with prevous PEFT versions, we need to check the signature
|
536
|
-
# of the `merge` method to see if it supports the `adapter_names` argument.
|
537
|
-
supported_merge_kwargs = list(inspect.signature(module.merge).parameters)
|
538
|
-
if "adapter_names" in supported_merge_kwargs:
|
539
|
-
merge_kwargs["adapter_names"] = adapter_names
|
540
|
-
elif "adapter_names" not in supported_merge_kwargs and adapter_names is not None:
|
541
|
-
raise ValueError(
|
542
|
-
"The `adapter_names` argument is not supported with your PEFT version. Please upgrade"
|
543
|
-
" to the latest version of PEFT. `pip install -U peft`"
|
544
|
-
)
|
545
|
-
|
546
|
-
module.merge(**merge_kwargs)
|
547
|
-
|
548
|
-
def unfuse_lora(self):
|
549
|
-
if not USE_PEFT_BACKEND:
|
550
|
-
raise ValueError("PEFT backend is required for `unfuse_lora()`.")
|
551
|
-
self.apply(self._unfuse_lora_apply)
|
552
|
-
|
553
|
-
def _unfuse_lora_apply(self, module):
|
554
|
-
from peft.tuners.tuners_utils import BaseTunerLayer
|
555
|
-
|
556
|
-
if isinstance(module, BaseTunerLayer):
|
557
|
-
module.unmerge()
|
558
|
-
|
559
|
-
def unload_lora(self):
|
560
|
-
if not USE_PEFT_BACKEND:
|
561
|
-
raise ValueError("PEFT backend is required for `unload_lora()`.")
|
562
|
-
|
563
|
-
from ..utils import recurse_remove_peft_layers
|
564
|
-
|
565
|
-
recurse_remove_peft_layers(self)
|
566
|
-
if hasattr(self, "peft_config"):
|
567
|
-
del self.peft_config
|
568
|
-
|
569
|
-
def set_adapters(
|
570
|
-
self,
|
571
|
-
adapter_names: Union[List[str], str],
|
572
|
-
weights: Optional[Union[float, Dict, List[float], List[Dict], List[None]]] = None,
|
573
|
-
):
|
574
|
-
"""
|
575
|
-
Set the currently active adapters for use in the UNet.
|
576
|
-
|
577
|
-
Args:
|
578
|
-
adapter_names (`List[str]` or `str`):
|
579
|
-
The names of the adapters to use.
|
580
|
-
adapter_weights (`Union[List[float], float]`, *optional*):
|
581
|
-
The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all the
|
582
|
-
adapters.
|
583
|
-
|
584
|
-
Example:
|
585
|
-
|
586
|
-
```py
|
587
|
-
from diffusers import AutoPipelineForText2Image
|
588
|
-
import torch
|
589
|
-
|
590
|
-
pipeline = AutoPipelineForText2Image.from_pretrained(
|
591
|
-
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
|
592
|
-
).to("cuda")
|
593
|
-
pipeline.load_lora_weights(
|
594
|
-
"jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
|
595
|
-
)
|
596
|
-
pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
|
597
|
-
pipeline.set_adapters(["cinematic", "pixel"], adapter_weights=[0.5, 0.5])
|
598
|
-
```
|
599
|
-
"""
|
600
|
-
if not USE_PEFT_BACKEND:
|
601
|
-
raise ValueError("PEFT backend is required for `set_adapters()`.")
|
602
|
-
|
603
|
-
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
|
604
|
-
|
605
|
-
# Expand weights into a list, one entry per adapter
|
606
|
-
# examples for e.g. 2 adapters: [{...}, 7] -> [7,7] ; None -> [None, None]
|
607
|
-
if not isinstance(weights, list):
|
608
|
-
weights = [weights] * len(adapter_names)
|
609
|
-
|
610
|
-
if len(adapter_names) != len(weights):
|
611
|
-
raise ValueError(
|
612
|
-
f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
|
613
|
-
)
|
614
|
-
|
615
|
-
# Set None values to default of 1.0
|
616
|
-
# e.g. [{...}, 7] -> [{...}, 7] ; [None, None] -> [1.0, 1.0]
|
617
|
-
weights = [w if w is not None else 1.0 for w in weights]
|
618
|
-
|
619
|
-
# e.g. [{...}, 7] -> [{expanded dict...}, 7]
|
620
|
-
weights = _maybe_expand_lora_scales(self, weights)
|
621
|
-
|
622
|
-
set_weights_and_activate_adapters(self, adapter_names, weights)
|
623
|
-
|
624
|
-
def disable_lora(self):
|
625
|
-
"""
|
626
|
-
Disable the UNet's active LoRA layers.
|
627
|
-
|
628
|
-
Example:
|
629
|
-
|
630
|
-
```py
|
631
|
-
from diffusers import AutoPipelineForText2Image
|
632
|
-
import torch
|
633
|
-
|
634
|
-
pipeline = AutoPipelineForText2Image.from_pretrained(
|
635
|
-
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
|
636
|
-
).to("cuda")
|
637
|
-
pipeline.load_lora_weights(
|
638
|
-
"jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
|
639
|
-
)
|
640
|
-
pipeline.disable_lora()
|
641
|
-
```
|
642
|
-
"""
|
643
|
-
if not USE_PEFT_BACKEND:
|
644
|
-
raise ValueError("PEFT backend is required for this method.")
|
645
|
-
set_adapter_layers(self, enabled=False)
|
646
|
-
|
647
|
-
def enable_lora(self):
|
648
|
-
"""
|
649
|
-
Enable the UNet's active LoRA layers.
|
650
|
-
|
651
|
-
Example:
|
652
|
-
|
653
|
-
```py
|
654
|
-
from diffusers import AutoPipelineForText2Image
|
655
|
-
import torch
|
656
|
-
|
657
|
-
pipeline = AutoPipelineForText2Image.from_pretrained(
|
658
|
-
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
|
659
|
-
).to("cuda")
|
660
|
-
pipeline.load_lora_weights(
|
661
|
-
"jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
|
662
|
-
)
|
663
|
-
pipeline.enable_lora()
|
664
|
-
```
|
665
|
-
"""
|
666
|
-
if not USE_PEFT_BACKEND:
|
667
|
-
raise ValueError("PEFT backend is required for this method.")
|
668
|
-
set_adapter_layers(self, enabled=True)
|
669
|
-
|
670
|
-
def delete_adapters(self, adapter_names: Union[List[str], str]):
|
671
|
-
"""
|
672
|
-
Delete an adapter's LoRA layers from the UNet.
|
673
|
-
|
674
|
-
Args:
|
675
|
-
adapter_names (`Union[List[str], str]`):
|
676
|
-
The names (single string or list of strings) of the adapter to delete.
|
677
|
-
|
678
|
-
Example:
|
679
|
-
|
680
|
-
```py
|
681
|
-
from diffusers import AutoPipelineForText2Image
|
682
|
-
import torch
|
683
|
-
|
684
|
-
pipeline = AutoPipelineForText2Image.from_pretrained(
|
685
|
-
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
|
686
|
-
).to("cuda")
|
687
|
-
pipeline.load_lora_weights(
|
688
|
-
"jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_names="cinematic"
|
689
|
-
)
|
690
|
-
pipeline.delete_adapters("cinematic")
|
691
|
-
```
|
692
|
-
"""
|
693
|
-
if not USE_PEFT_BACKEND:
|
694
|
-
raise ValueError("PEFT backend is required for this method.")
|
695
|
-
|
696
|
-
if isinstance(adapter_names, str):
|
697
|
-
adapter_names = [adapter_names]
|
698
|
-
|
699
|
-
for adapter_name in adapter_names:
|
700
|
-
delete_adapter_layers(self, adapter_name)
|
701
|
-
|
702
|
-
# Pop also the corresponding adapter from the config
|
703
|
-
if hasattr(self, "peft_config"):
|
704
|
-
self.peft_config.pop(adapter_name, None)
|
705
|
-
|
706
516
|
def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=False):
|
707
517
|
if low_cpu_mem_usage:
|
708
518
|
if is_accelerate_available():
|
@@ -922,8 +732,6 @@ class UNet2DConditionLoadersMixin:
|
|
922
732
|
|
923
733
|
def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=False):
|
924
734
|
from ..models.attention_processor import (
|
925
|
-
AttnProcessor,
|
926
|
-
AttnProcessor2_0,
|
927
735
|
IPAdapterAttnProcessor,
|
928
736
|
IPAdapterAttnProcessor2_0,
|
929
737
|
)
|
@@ -963,9 +771,7 @@ class UNet2DConditionLoadersMixin:
|
|
963
771
|
hidden_size = self.config.block_out_channels[block_id]
|
964
772
|
|
965
773
|
if cross_attention_dim is None or "motion_modules" in name:
|
966
|
-
attn_processor_class =
|
967
|
-
AttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else AttnProcessor
|
968
|
-
)
|
774
|
+
attn_processor_class = self.attn_processors[name].__class__
|
969
775
|
attn_procs[name] = attn_processor_class()
|
970
776
|
|
971
777
|
else:
|
@@ -1017,6 +823,15 @@ class UNet2DConditionLoadersMixin:
|
|
1017
823
|
def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=False):
|
1018
824
|
if not isinstance(state_dicts, list):
|
1019
825
|
state_dicts = [state_dicts]
|
826
|
+
|
827
|
+
# Kolors Unet already has a `encoder_hid_proj`
|
828
|
+
if (
|
829
|
+
self.encoder_hid_proj is not None
|
830
|
+
and self.config.encoder_hid_dim_type == "text_proj"
|
831
|
+
and not hasattr(self, "text_encoder_hid_proj")
|
832
|
+
):
|
833
|
+
self.text_encoder_hid_proj = self.encoder_hid_proj
|
834
|
+
|
1020
835
|
# Set encoder_hid_proj after loading ip_adapter weights,
|
1021
836
|
# because `IPAdapterPlusImageProjection` also has `attn_processors`.
|
1022
837
|
self.encoder_hid_proj = None
|
diffusers/models/__init__.py
CHANGED
@@ -28,22 +28,32 @@ if is_torch_available():
|
|
28
28
|
_import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"]
|
29
29
|
_import_structure["autoencoders.autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"]
|
30
30
|
_import_structure["autoencoders.autoencoder_kl"] = ["AutoencoderKL"]
|
31
|
+
_import_structure["autoencoders.autoencoder_kl_cogvideox"] = ["AutoencoderKLCogVideoX"]
|
31
32
|
_import_structure["autoencoders.autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"]
|
33
|
+
_import_structure["autoencoders.autoencoder_oobleck"] = ["AutoencoderOobleck"]
|
32
34
|
_import_structure["autoencoders.autoencoder_tiny"] = ["AutoencoderTiny"]
|
33
35
|
_import_structure["autoencoders.consistency_decoder_vae"] = ["ConsistencyDecoderVAE"]
|
34
36
|
_import_structure["autoencoders.vq_model"] = ["VQModel"]
|
35
37
|
_import_structure["controlnet"] = ["ControlNetModel"]
|
38
|
+
_import_structure["controlnet_hunyuan"] = ["HunyuanDiT2DControlNetModel", "HunyuanDiT2DMultiControlNetModel"]
|
36
39
|
_import_structure["controlnet_sd3"] = ["SD3ControlNetModel", "SD3MultiControlNetModel"]
|
40
|
+
_import_structure["controlnet_sparsectrl"] = ["SparseControlNetModel"]
|
37
41
|
_import_structure["controlnet_xs"] = ["ControlNetXSAdapter", "UNetControlNetXSModel"]
|
38
42
|
_import_structure["embeddings"] = ["ImageProjection"]
|
39
43
|
_import_structure["modeling_utils"] = ["ModelMixin"]
|
44
|
+
_import_structure["transformers.auraflow_transformer_2d"] = ["AuraFlowTransformer2DModel"]
|
45
|
+
_import_structure["transformers.cogvideox_transformer_3d"] = ["CogVideoXTransformer3DModel"]
|
40
46
|
_import_structure["transformers.dit_transformer_2d"] = ["DiTTransformer2DModel"]
|
41
47
|
_import_structure["transformers.dual_transformer_2d"] = ["DualTransformer2DModel"]
|
42
48
|
_import_structure["transformers.hunyuan_transformer_2d"] = ["HunyuanDiT2DModel"]
|
49
|
+
_import_structure["transformers.latte_transformer_3d"] = ["LatteTransformer3DModel"]
|
50
|
+
_import_structure["transformers.lumina_nextdit2d"] = ["LuminaNextDiT2DModel"]
|
43
51
|
_import_structure["transformers.pixart_transformer_2d"] = ["PixArtTransformer2DModel"]
|
44
52
|
_import_structure["transformers.prior_transformer"] = ["PriorTransformer"]
|
53
|
+
_import_structure["transformers.stable_audio_transformer"] = ["StableAudioDiTModel"]
|
45
54
|
_import_structure["transformers.t5_film_transformer"] = ["T5FilmDecoder"]
|
46
55
|
_import_structure["transformers.transformer_2d"] = ["Transformer2DModel"]
|
56
|
+
_import_structure["transformers.transformer_flux"] = ["FluxTransformer2DModel"]
|
47
57
|
_import_structure["transformers.transformer_sd3"] = ["SD3Transformer2DModel"]
|
48
58
|
_import_structure["transformers.transformer_temporal"] = ["TransformerTemporalModel"]
|
49
59
|
_import_structure["unets.unet_1d"] = ["UNet1DModel"]
|
@@ -69,23 +79,33 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
69
79
|
from .autoencoders import (
|
70
80
|
AsymmetricAutoencoderKL,
|
71
81
|
AutoencoderKL,
|
82
|
+
AutoencoderKLCogVideoX,
|
72
83
|
AutoencoderKLTemporalDecoder,
|
84
|
+
AutoencoderOobleck,
|
73
85
|
AutoencoderTiny,
|
74
86
|
ConsistencyDecoderVAE,
|
75
87
|
VQModel,
|
76
88
|
)
|
77
89
|
from .controlnet import ControlNetModel
|
90
|
+
from .controlnet_hunyuan import HunyuanDiT2DControlNetModel, HunyuanDiT2DMultiControlNetModel
|
78
91
|
from .controlnet_sd3 import SD3ControlNetModel, SD3MultiControlNetModel
|
92
|
+
from .controlnet_sparsectrl import SparseControlNetModel
|
79
93
|
from .controlnet_xs import ControlNetXSAdapter, UNetControlNetXSModel
|
80
94
|
from .embeddings import ImageProjection
|
81
95
|
from .modeling_utils import ModelMixin
|
82
96
|
from .transformers import (
|
97
|
+
AuraFlowTransformer2DModel,
|
98
|
+
CogVideoXTransformer3DModel,
|
83
99
|
DiTTransformer2DModel,
|
84
100
|
DualTransformer2DModel,
|
101
|
+
FluxTransformer2DModel,
|
85
102
|
HunyuanDiT2DModel,
|
103
|
+
LatteTransformer3DModel,
|
104
|
+
LuminaNextDiT2DModel,
|
86
105
|
PixArtTransformer2DModel,
|
87
106
|
PriorTransformer,
|
88
107
|
SD3Transformer2DModel,
|
108
|
+
StableAudioDiTModel,
|
89
109
|
T5FilmDecoder,
|
90
110
|
Transformer2DModel,
|
91
111
|
TransformerTemporalModel,
|
diffusers/models/activations.py
CHANGED
@@ -123,6 +123,28 @@ class GEGLU(nn.Module):
|
|
123
123
|
return hidden_states * self.gelu(gate)
|
124
124
|
|
125
125
|
|
126
|
+
class SwiGLU(nn.Module):
|
127
|
+
r"""
|
128
|
+
A [variant](https://arxiv.org/abs/2002.05202) of the gated linear unit activation function. It's similar to `GEGLU`
|
129
|
+
but uses SiLU / Swish instead of GeLU.
|
130
|
+
|
131
|
+
Parameters:
|
132
|
+
dim_in (`int`): The number of channels in the input.
|
133
|
+
dim_out (`int`): The number of channels in the output.
|
134
|
+
bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
|
135
|
+
"""
|
136
|
+
|
137
|
+
def __init__(self, dim_in: int, dim_out: int, bias: bool = True):
|
138
|
+
super().__init__()
|
139
|
+
self.proj = nn.Linear(dim_in, dim_out * 2, bias=bias)
|
140
|
+
self.activation = nn.SiLU()
|
141
|
+
|
142
|
+
def forward(self, hidden_states):
|
143
|
+
hidden_states = self.proj(hidden_states)
|
144
|
+
hidden_states, gate = hidden_states.chunk(2, dim=-1)
|
145
|
+
return hidden_states * self.activation(gate)
|
146
|
+
|
147
|
+
|
126
148
|
class ApproximateGELU(nn.Module):
|
127
149
|
r"""
|
128
150
|
The approximate form of the Gaussian Error Linear Unit (GELU). For more details, see section 2 of this
|