diffusers 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +11 -1
- diffusers/commands/fp16_safetensors.py +10 -11
- diffusers/configuration_utils.py +12 -8
- diffusers/dependency_versions_table.py +2 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/image_processor.py +286 -46
- diffusers/loaders/ip_adapter.py +11 -9
- diffusers/loaders/lora.py +198 -60
- diffusers/loaders/single_file.py +24 -18
- diffusers/loaders/textual_inversion.py +10 -14
- diffusers/loaders/unet.py +130 -37
- diffusers/models/__init__.py +18 -12
- diffusers/models/activations.py +9 -6
- diffusers/models/attention.py +137 -16
- diffusers/models/attention_processor.py +133 -46
- diffusers/models/autoencoders/__init__.py +5 -0
- diffusers/models/{autoencoder_asym_kl.py → autoencoders/autoencoder_asym_kl.py} +4 -4
- diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +45 -6
- diffusers/models/{autoencoder_kl_temporal_decoder.py → autoencoders/autoencoder_kl_temporal_decoder.py} +8 -8
- diffusers/models/{autoencoder_tiny.py → autoencoders/autoencoder_tiny.py} +4 -4
- diffusers/models/{consistency_decoder_vae.py → autoencoders/consistency_decoder_vae.py} +14 -14
- diffusers/models/{vae.py → autoencoders/vae.py} +9 -5
- diffusers/models/downsampling.py +338 -0
- diffusers/models/embeddings.py +112 -29
- diffusers/models/modeling_flax_utils.py +12 -7
- diffusers/models/modeling_utils.py +10 -10
- diffusers/models/normalization.py +108 -2
- diffusers/models/resnet.py +15 -699
- diffusers/models/transformer_2d.py +2 -2
- diffusers/models/unet_2d_condition.py +37 -0
- diffusers/models/{unet_kandi3.py → unet_kandinsky3.py} +105 -159
- diffusers/models/upsampling.py +454 -0
- diffusers/models/uvit_2d.py +471 -0
- diffusers/models/vq_model.py +9 -2
- diffusers/pipelines/__init__.py +81 -73
- diffusers/pipelines/amused/__init__.py +62 -0
- diffusers/pipelines/amused/pipeline_amused.py +328 -0
- diffusers/pipelines/amused/pipeline_amused_img2img.py +347 -0
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +378 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +38 -10
- diffusers/pipelines/auto_pipeline.py +17 -13
- diffusers/pipelines/controlnet/pipeline_controlnet.py +27 -10
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +47 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +25 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +4 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +26 -10
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +4 -3
- diffusers/pipelines/deprecated/__init__.py +153 -0
- diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/__init__.py +3 -3
- diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion.py +91 -18
- diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion_img2img.py +91 -18
- diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_output.py +1 -1
- diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/__init__.py +1 -1
- diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/mel.py +2 -2
- diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/pipeline_audio_diffusion.py +4 -4
- diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/__init__.py +1 -1
- diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/pipeline_latent_diffusion_uncond.py +4 -4
- diffusers/pipelines/{pndm → deprecated/pndm}/__init__.py +1 -1
- diffusers/pipelines/{pndm → deprecated/pndm}/pipeline_pndm.py +4 -4
- diffusers/pipelines/{repaint → deprecated/repaint}/__init__.py +1 -1
- diffusers/pipelines/{repaint → deprecated/repaint}/pipeline_repaint.py +5 -5
- diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/__init__.py +1 -1
- diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/pipeline_score_sde_ve.py +4 -4
- diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/__init__.py +6 -6
- diffusers/pipelines/{spectrogram_diffusion/continous_encoder.py → deprecated/spectrogram_diffusion/continuous_encoder.py} +2 -2
- diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/midi_utils.py +1 -1
- diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/notes_encoder.py +2 -2
- diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py +55 -0
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_cycle_diffusion.py +16 -11
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_onnx_stable_diffusion_inpaint_legacy.py +6 -6
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_inpaint_legacy.py +11 -11
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_model_editing.py +16 -11
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_paradigms.py +10 -10
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_pix2pix_zero.py +13 -13
- diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/__init__.py +1 -1
- diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/pipeline_stochastic_karras_ve.py +4 -4
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/__init__.py +3 -3
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py +54 -11
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py +4 -4
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py +6 -6
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py +6 -6
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py +6 -6
- diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py +3 -3
- diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/kandinsky3/__init__.py +4 -4
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +98 -0
- diffusers/pipelines/kandinsky3/{kandinsky3_pipeline.py → pipeline_kandinsky3.py} +172 -35
- diffusers/pipelines/kandinsky3/{kandinsky3img2img_pipeline.py → pipeline_kandinsky3_img2img.py} +228 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +46 -5
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +47 -6
- diffusers/pipelines/onnx_utils.py +8 -5
- diffusers/pipelines/pipeline_flax_utils.py +7 -6
- diffusers/pipelines/pipeline_utils.py +30 -29
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +51 -2
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/__init__.py +1 -72
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +67 -75
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +92 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +92 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +138 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +57 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +3 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +6 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -0
- diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_attend_and_excite}/pipeline_stable_diffusion_attend_and_excite.py +5 -2
- diffusers/pipelines/stable_diffusion_diffedit/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_diffedit}/pipeline_stable_diffusion_diffedit.py +2 -3
- diffusers/pipelines/stable_diffusion_gligen/__init__.py +50 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen.py +2 -2
- diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen_text_image.py +3 -3
- diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py +60 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_k_diffusion}/pipeline_stable_diffusion_k_diffusion.py +6 -1
- diffusers/pipelines/stable_diffusion_ldm3d/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_ldm3d}/pipeline_stable_diffusion_ldm3d.py +50 -7
- diffusers/pipelines/stable_diffusion_panorama/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_panorama}/pipeline_stable_diffusion_panorama.py +56 -8
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +58 -6
- diffusers/pipelines/stable_diffusion_sag/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_sag}/pipeline_stable_diffusion_sag.py +67 -10
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +97 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +98 -14
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +97 -14
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +7 -5
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +12 -9
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +6 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +5 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +331 -9
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +468 -9
- diffusers/pipelines/unclip/pipeline_unclip.py +2 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -0
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +4 -0
- diffusers/schedulers/__init__.py +2 -0
- diffusers/schedulers/scheduling_amused.py +162 -0
- diffusers/schedulers/scheduling_consistency_models.py +2 -0
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -4
- diffusers/schedulers/scheduling_ddpm.py +46 -0
- diffusers/schedulers/scheduling_ddpm_parallel.py +46 -0
- diffusers/schedulers/scheduling_deis_multistep.py +13 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +13 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +13 -1
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -0
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -1
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -0
- diffusers/schedulers/scheduling_euler_discrete.py +62 -3
- diffusers/schedulers/scheduling_heun_discrete.py +2 -0
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -0
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -0
- diffusers/schedulers/scheduling_lms_discrete.py +2 -0
- diffusers/schedulers/scheduling_unipc_multistep.py +13 -1
- diffusers/schedulers/scheduling_utils.py +3 -1
- diffusers/schedulers/scheduling_utils_flax.py +3 -1
- diffusers/training_utils.py +1 -1
- diffusers/utils/__init__.py +0 -2
- diffusers/utils/constants.py +2 -5
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +45 -0
- diffusers/utils/dynamic_modules_utils.py +14 -18
- diffusers/utils/hub_utils.py +24 -36
- diffusers/utils/logging.py +1 -1
- diffusers/utils/state_dict_utils.py +8 -0
- diffusers/utils/testing_utils.py +199 -1
- diffusers/utils/torch_utils.py +3 -3
- {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/METADATA +54 -53
- {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/RECORD +174 -155
- {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/WHEEL +1 -1
- {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/entry_points.txt +0 -1
- /diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/modeling_roberta_series.py +0 -0
- {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/LICENSE +0 -0
- {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/top_level.txt +0 -0
@@ -21,17 +21,17 @@ import torch
|
|
21
21
|
from packaging import version
|
22
22
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
23
23
|
|
24
|
-
from
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
28
|
-
from
|
29
|
-
from
|
30
|
-
from
|
31
|
-
from
|
32
|
-
from
|
33
|
-
from .pipeline_output import StableDiffusionPipelineOutput
|
34
|
-
from .safety_checker import StableDiffusionSafetyChecker
|
24
|
+
from ....configuration_utils import FrozenDict
|
25
|
+
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
26
|
+
from ....loaders import LoraLoaderMixin, TextualInversionLoaderMixin
|
27
|
+
from ....models import AutoencoderKL, UNet2DConditionModel
|
28
|
+
from ....models.lora import adjust_lora_scale_text_encoder
|
29
|
+
from ....schedulers import DDIMScheduler
|
30
|
+
from ....utils import PIL_INTERPOLATION, USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
|
31
|
+
from ....utils.torch_utils import randn_tensor
|
32
|
+
from ...pipeline_utils import DiffusionPipeline
|
33
|
+
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
34
|
+
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
35
35
|
|
36
36
|
|
37
37
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
@@ -143,6 +143,11 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
|
143
143
|
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
|
144
144
|
implemented for all pipelines (downloading, saving, running on a particular device, etc.).
|
145
145
|
|
146
|
+
The pipeline also inherits the following loading methods:
|
147
|
+
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
|
148
|
+
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
|
149
|
+
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
|
150
|
+
|
146
151
|
Args:
|
147
152
|
vae ([`AutoencoderKL`]):
|
148
153
|
Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
|
@@ -6,12 +6,12 @@ import PIL.Image
|
|
6
6
|
import torch
|
7
7
|
from transformers import CLIPImageProcessor, CLIPTokenizer
|
8
8
|
|
9
|
-
from
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from . import StableDiffusionPipelineOutput
|
9
|
+
from ....configuration_utils import FrozenDict
|
10
|
+
from ....schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
|
11
|
+
from ....utils import deprecate, logging
|
12
|
+
from ...onnx_utils import ORT_TO_NP_TYPE, OnnxRuntimeModel
|
13
|
+
from ...pipeline_utils import DiffusionPipeline
|
14
|
+
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
15
15
|
|
16
16
|
|
17
17
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
@@ -21,17 +21,17 @@ import torch
|
|
21
21
|
from packaging import version
|
22
22
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
23
23
|
|
24
|
-
from
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
28
|
-
from
|
29
|
-
from
|
30
|
-
from
|
31
|
-
from
|
32
|
-
from
|
33
|
-
from
|
34
|
-
from .safety_checker import StableDiffusionSafetyChecker
|
24
|
+
from ....configuration_utils import FrozenDict
|
25
|
+
from ....image_processor import VaeImageProcessor
|
26
|
+
from ....loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
27
|
+
from ....models import AutoencoderKL, UNet2DConditionModel
|
28
|
+
from ....models.lora import adjust_lora_scale_text_encoder
|
29
|
+
from ....schedulers import KarrasDiffusionSchedulers
|
30
|
+
from ....utils import PIL_INTERPOLATION, USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
|
31
|
+
from ....utils.torch_utils import randn_tensor
|
32
|
+
from ...pipeline_utils import DiffusionPipeline
|
33
|
+
from ...stable_diffusion import StableDiffusionPipelineOutput
|
34
|
+
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
35
35
|
|
36
36
|
|
37
37
|
logger = logging.get_logger(__name__)
|
@@ -18,17 +18,17 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
|
18
18
|
import torch
|
19
19
|
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
|
20
20
|
|
21
|
-
from
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
28
|
-
from
|
29
|
-
from
|
30
|
-
from . import StableDiffusionPipelineOutput
|
31
|
-
from .safety_checker import StableDiffusionSafetyChecker
|
21
|
+
from ....image_processor import VaeImageProcessor
|
22
|
+
from ....loaders import LoraLoaderMixin, TextualInversionLoaderMixin
|
23
|
+
from ....models import AutoencoderKL, UNet2DConditionModel
|
24
|
+
from ....models.lora import adjust_lora_scale_text_encoder
|
25
|
+
from ....schedulers import PNDMScheduler
|
26
|
+
from ....schedulers.scheduling_utils import SchedulerMixin
|
27
|
+
from ....utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
|
28
|
+
from ....utils.torch_utils import randn_tensor
|
29
|
+
from ...pipeline_utils import DiffusionPipeline
|
30
|
+
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
31
|
+
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
32
32
|
|
33
33
|
|
34
34
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
@@ -43,6 +43,11 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa
|
|
43
43
|
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
|
44
44
|
implemented for all pipelines (downloading, saving, running on a particular device, etc.).
|
45
45
|
|
46
|
+
The pipeline also inherits the following loading methods:
|
47
|
+
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
|
48
|
+
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
|
49
|
+
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
|
50
|
+
|
46
51
|
Args:
|
47
52
|
vae ([`AutoencoderKL`]):
|
48
53
|
Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
|
@@ -18,12 +18,12 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
|
18
18
|
import torch
|
19
19
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
20
20
|
|
21
|
-
from
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
25
|
-
from
|
26
|
-
from
|
21
|
+
from ....image_processor import VaeImageProcessor
|
22
|
+
from ....loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
23
|
+
from ....models import AutoencoderKL, UNet2DConditionModel
|
24
|
+
from ....models.lora import adjust_lora_scale_text_encoder
|
25
|
+
from ....schedulers import KarrasDiffusionSchedulers
|
26
|
+
from ....utils import (
|
27
27
|
USE_PEFT_BACKEND,
|
28
28
|
deprecate,
|
29
29
|
logging,
|
@@ -31,10 +31,10 @@ from ...utils import (
|
|
31
31
|
scale_lora_layers,
|
32
32
|
unscale_lora_layers,
|
33
33
|
)
|
34
|
-
from
|
35
|
-
from
|
36
|
-
from . import StableDiffusionPipelineOutput
|
37
|
-
from .safety_checker import StableDiffusionSafetyChecker
|
34
|
+
from ....utils.torch_utils import randn_tensor
|
35
|
+
from ...pipeline_utils import DiffusionPipeline
|
36
|
+
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
37
|
+
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
38
38
|
|
39
39
|
|
40
40
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
@@ -28,14 +28,14 @@ from transformers import (
|
|
28
28
|
CLIPTokenizer,
|
29
29
|
)
|
30
30
|
|
31
|
-
from
|
32
|
-
from
|
33
|
-
from
|
34
|
-
from
|
35
|
-
from
|
36
|
-
from
|
37
|
-
from
|
38
|
-
from
|
31
|
+
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
32
|
+
from ....loaders import LoraLoaderMixin, TextualInversionLoaderMixin
|
33
|
+
from ....models import AutoencoderKL, UNet2DConditionModel
|
34
|
+
from ....models.attention_processor import Attention
|
35
|
+
from ....models.lora import adjust_lora_scale_text_encoder
|
36
|
+
from ....schedulers import DDIMScheduler, DDPMScheduler, EulerAncestralDiscreteScheduler, LMSDiscreteScheduler
|
37
|
+
from ....schedulers.scheduling_ddim_inverse import DDIMInverseScheduler
|
38
|
+
from ....utils import (
|
39
39
|
PIL_INTERPOLATION,
|
40
40
|
USE_PEFT_BACKEND,
|
41
41
|
BaseOutput,
|
@@ -45,10 +45,10 @@ from ...utils import (
|
|
45
45
|
scale_lora_layers,
|
46
46
|
unscale_lora_layers,
|
47
47
|
)
|
48
|
-
from
|
49
|
-
from
|
50
|
-
from . import StableDiffusionPipelineOutput
|
51
|
-
from .safety_checker import StableDiffusionSafetyChecker
|
48
|
+
from ....utils.torch_utils import randn_tensor
|
49
|
+
from ...pipeline_utils import DiffusionPipeline
|
50
|
+
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
51
|
+
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
52
52
|
|
53
53
|
|
54
54
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
@@ -282,7 +282,7 @@ class Pix2PixZeroAttnProcessor:
|
|
282
282
|
|
283
283
|
class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
|
284
284
|
r"""
|
285
|
-
Pipeline for pixel-
|
285
|
+
Pipeline for pixel-level image editing using Pix2Pix Zero. Based on Stable Diffusion.
|
286
286
|
|
287
287
|
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
288
288
|
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
@@ -16,10 +16,10 @@ from typing import List, Optional, Tuple, Union
|
|
16
16
|
|
17
17
|
import torch
|
18
18
|
|
19
|
-
from
|
20
|
-
from
|
21
|
-
from
|
22
|
-
from
|
19
|
+
from ....models import UNet2DModel
|
20
|
+
from ....schedulers import KarrasVeScheduler
|
21
|
+
from ....utils.torch_utils import randn_tensor
|
22
|
+
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
23
23
|
|
24
24
|
|
25
25
|
class KarrasVePipeline(DiffusionPipeline):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from typing import TYPE_CHECKING
|
2
2
|
|
3
|
-
from
|
3
|
+
from ....utils import (
|
4
4
|
DIFFUSERS_SLOW_IMPORT,
|
5
5
|
OptionalDependencyNotAvailable,
|
6
6
|
_LazyModule,
|
@@ -17,7 +17,7 @@ try:
|
|
17
17
|
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
|
18
18
|
raise OptionalDependencyNotAvailable()
|
19
19
|
except OptionalDependencyNotAvailable:
|
20
|
-
from
|
20
|
+
from ....utils.dummy_torch_and_transformers_objects import (
|
21
21
|
VersatileDiffusionDualGuidedPipeline,
|
22
22
|
VersatileDiffusionImageVariationPipeline,
|
23
23
|
VersatileDiffusionPipeline,
|
@@ -45,7 +45,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
45
45
|
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
|
46
46
|
raise OptionalDependencyNotAvailable()
|
47
47
|
except OptionalDependencyNotAvailable:
|
48
|
-
from
|
48
|
+
from ....utils.dummy_torch_and_transformers_objects import (
|
49
49
|
VersatileDiffusionDualGuidedPipeline,
|
50
50
|
VersatileDiffusionImageVariationPipeline,
|
51
51
|
VersatileDiffusionPipeline,
|
diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py
RENAMED
@@ -7,20 +7,20 @@ import torch.nn.functional as F
|
|
7
7
|
|
8
8
|
from diffusers.utils import deprecate
|
9
9
|
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from ...models.attention_processor import (
|
10
|
+
from ....configuration_utils import ConfigMixin, register_to_config
|
11
|
+
from ....models import ModelMixin
|
12
|
+
from ....models.activations import get_activation
|
13
|
+
from ....models.attention_processor import (
|
15
14
|
ADDED_KV_ATTENTION_PROCESSORS,
|
16
15
|
CROSS_ATTENTION_PROCESSORS,
|
16
|
+
Attention,
|
17
17
|
AttentionProcessor,
|
18
18
|
AttnAddedKVProcessor,
|
19
19
|
AttnAddedKVProcessor2_0,
|
20
20
|
AttnProcessor,
|
21
21
|
)
|
22
|
-
from
|
23
|
-
from
|
22
|
+
from ....models.dual_transformer_2d import DualTransformer2DModel
|
23
|
+
from ....models.embeddings import (
|
24
24
|
GaussianFourierProjection,
|
25
25
|
ImageHintTimeEmbedding,
|
26
26
|
ImageProjection,
|
@@ -31,10 +31,10 @@ from ...models.embeddings import (
|
|
31
31
|
TimestepEmbedding,
|
32
32
|
Timesteps,
|
33
33
|
)
|
34
|
-
from
|
35
|
-
from
|
36
|
-
from
|
37
|
-
from
|
34
|
+
from ....models.transformer_2d import Transformer2DModel
|
35
|
+
from ....models.unet_2d_condition import UNet2DConditionOutput
|
36
|
+
from ....utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
|
37
|
+
from ....utils.torch_utils import apply_freeu
|
38
38
|
|
39
39
|
|
40
40
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
@@ -50,6 +50,9 @@ def get_down_block(
|
|
50
50
|
resnet_eps,
|
51
51
|
resnet_act_fn,
|
52
52
|
num_attention_heads,
|
53
|
+
transformer_layers_per_block,
|
54
|
+
attention_type,
|
55
|
+
attention_head_dim,
|
53
56
|
resnet_groups=None,
|
54
57
|
cross_attention_dim=None,
|
55
58
|
downsample_padding=None,
|
@@ -113,6 +116,10 @@ def get_up_block(
|
|
113
116
|
resnet_eps,
|
114
117
|
resnet_act_fn,
|
115
118
|
num_attention_heads,
|
119
|
+
transformer_layers_per_block,
|
120
|
+
resolution_idx,
|
121
|
+
attention_type,
|
122
|
+
attention_head_dim,
|
116
123
|
resnet_groups=None,
|
117
124
|
cross_attention_dim=None,
|
118
125
|
dual_cross_attention=False,
|
@@ -993,6 +1000,42 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
|
|
993
1000
|
if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None:
|
994
1001
|
setattr(upsample_block, k, None)
|
995
1002
|
|
1003
|
+
def fuse_qkv_projections(self):
|
1004
|
+
"""
|
1005
|
+
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
|
1006
|
+
key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
|
1007
|
+
|
1008
|
+
<Tip warning={true}>
|
1009
|
+
|
1010
|
+
This API is 🧪 experimental.
|
1011
|
+
|
1012
|
+
</Tip>
|
1013
|
+
"""
|
1014
|
+
self.original_attn_processors = None
|
1015
|
+
|
1016
|
+
for _, attn_processor in self.attn_processors.items():
|
1017
|
+
if "Added" in str(attn_processor.__class__.__name__):
|
1018
|
+
raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
|
1019
|
+
|
1020
|
+
self.original_attn_processors = self.attn_processors
|
1021
|
+
|
1022
|
+
for module in self.modules():
|
1023
|
+
if isinstance(module, Attention):
|
1024
|
+
module.fuse_projections(fuse=True)
|
1025
|
+
|
1026
|
+
def unfuse_qkv_projections(self):
|
1027
|
+
"""Disables the fused QKV projection if enabled.
|
1028
|
+
|
1029
|
+
<Tip warning={true}>
|
1030
|
+
|
1031
|
+
This API is 🧪 experimental.
|
1032
|
+
|
1033
|
+
</Tip>
|
1034
|
+
|
1035
|
+
"""
|
1036
|
+
if self.original_attn_processors is not None:
|
1037
|
+
self.set_attn_processor(self.original_attn_processors)
|
1038
|
+
|
996
1039
|
def forward(
|
997
1040
|
self,
|
998
1041
|
sample: torch.FloatTensor,
|
@@ -5,10 +5,10 @@ import PIL.Image
|
|
5
5
|
import torch
|
6
6
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModel
|
7
7
|
|
8
|
-
from
|
9
|
-
from
|
10
|
-
from
|
11
|
-
from
|
8
|
+
from ....models import AutoencoderKL, UNet2DConditionModel
|
9
|
+
from ....schedulers import KarrasDiffusionSchedulers
|
10
|
+
from ....utils import logging
|
11
|
+
from ...pipeline_utils import DiffusionPipeline
|
12
12
|
from .pipeline_versatile_diffusion_dual_guided import VersatileDiffusionDualGuidedPipeline
|
13
13
|
from .pipeline_versatile_diffusion_image_variation import VersatileDiffusionImageVariationPipeline
|
14
14
|
from .pipeline_versatile_diffusion_text_to_image import VersatileDiffusionTextToImagePipeline
|
@@ -26,12 +26,12 @@ from transformers import (
|
|
26
26
|
CLIPVisionModelWithProjection,
|
27
27
|
)
|
28
28
|
|
29
|
-
from
|
30
|
-
from
|
31
|
-
from
|
32
|
-
from
|
33
|
-
from
|
34
|
-
from
|
29
|
+
from ....image_processor import VaeImageProcessor
|
30
|
+
from ....models import AutoencoderKL, DualTransformer2DModel, Transformer2DModel, UNet2DConditionModel
|
31
|
+
from ....schedulers import KarrasDiffusionSchedulers
|
32
|
+
from ....utils import deprecate, logging
|
33
|
+
from ....utils.torch_utils import randn_tensor
|
34
|
+
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
35
35
|
from .modeling_text_unet import UNetFlatConditionModel
|
36
36
|
|
37
37
|
|
@@ -21,12 +21,12 @@ import torch
|
|
21
21
|
import torch.utils.checkpoint
|
22
22
|
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
|
23
23
|
|
24
|
-
from
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
28
|
-
from
|
29
|
-
from
|
24
|
+
from ....image_processor import VaeImageProcessor
|
25
|
+
from ....models import AutoencoderKL, UNet2DConditionModel
|
26
|
+
from ....schedulers import KarrasDiffusionSchedulers
|
27
|
+
from ....utils import deprecate, logging
|
28
|
+
from ....utils.torch_utils import randn_tensor
|
29
|
+
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
30
30
|
|
31
31
|
|
32
32
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
@@ -19,12 +19,12 @@ import torch
|
|
19
19
|
import torch.utils.checkpoint
|
20
20
|
from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer
|
21
21
|
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
22
|
+
from ....image_processor import VaeImageProcessor
|
23
|
+
from ....models import AutoencoderKL, Transformer2DModel, UNet2DConditionModel
|
24
|
+
from ....schedulers import KarrasDiffusionSchedulers
|
25
|
+
from ....utils import deprecate, logging
|
26
|
+
from ....utils.torch_utils import randn_tensor
|
27
|
+
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
28
28
|
from .modeling_text_unet import UNetFlatConditionModel
|
29
29
|
|
30
30
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from typing import TYPE_CHECKING
|
2
2
|
|
3
|
-
from
|
3
|
+
from ....utils import (
|
4
4
|
DIFFUSERS_SLOW_IMPORT,
|
5
5
|
OptionalDependencyNotAvailable,
|
6
6
|
_LazyModule,
|
@@ -16,7 +16,7 @@ try:
|
|
16
16
|
if not (is_transformers_available() and is_torch_available()):
|
17
17
|
raise OptionalDependencyNotAvailable()
|
18
18
|
except OptionalDependencyNotAvailable:
|
19
|
-
from
|
19
|
+
from ....utils.dummy_torch_and_transformers_objects import (
|
20
20
|
LearnedClassifierFreeSamplingEmbeddings,
|
21
21
|
VQDiffusionPipeline,
|
22
22
|
)
|
@@ -36,7 +36,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
36
36
|
if not (is_transformers_available() and is_torch_available()):
|
37
37
|
raise OptionalDependencyNotAvailable()
|
38
38
|
except OptionalDependencyNotAvailable:
|
39
|
-
from
|
39
|
+
from ....utils.dummy_torch_and_transformers_objects import (
|
40
40
|
LearnedClassifierFreeSamplingEmbeddings,
|
41
41
|
VQDiffusionPipeline,
|
42
42
|
)
|
@@ -17,11 +17,11 @@ from typing import Callable, List, Optional, Tuple, Union
|
|
17
17
|
import torch
|
18
18
|
from transformers import CLIPTextModel, CLIPTokenizer
|
19
19
|
|
20
|
-
from
|
21
|
-
from
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
20
|
+
from ....configuration_utils import ConfigMixin, register_to_config
|
21
|
+
from ....models import ModelMixin, Transformer2DModel, VQModel
|
22
|
+
from ....schedulers import VQDiffusionScheduler
|
23
|
+
from ....utils import logging
|
24
|
+
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
25
25
|
|
26
26
|
|
27
27
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
@@ -21,8 +21,8 @@ except OptionalDependencyNotAvailable:
|
|
21
21
|
|
22
22
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
23
23
|
else:
|
24
|
-
_import_structure["
|
25
|
-
_import_structure["
|
24
|
+
_import_structure["pipeline_kandinsky3"] = ["Kandinsky3Pipeline"]
|
25
|
+
_import_structure["pipeline_kandinsky3_img2img"] = ["Kandinsky3Img2ImgPipeline"]
|
26
26
|
|
27
27
|
|
28
28
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
@@ -33,8 +33,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
33
33
|
except OptionalDependencyNotAvailable:
|
34
34
|
from ...utils.dummy_torch_and_transformers_objects import *
|
35
35
|
else:
|
36
|
-
from .
|
37
|
-
from .
|
36
|
+
from .pipeline_kandinsky3 import Kandinsky3Pipeline
|
37
|
+
from .pipeline_kandinsky3_img2img import Kandinsky3Img2ImgPipeline
|
38
38
|
else:
|
39
39
|
import sys
|
40
40
|
|
@@ -0,0 +1,98 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
import argparse
|
3
|
+
import fnmatch
|
4
|
+
|
5
|
+
from safetensors.torch import load_file
|
6
|
+
|
7
|
+
from diffusers import Kandinsky3UNet
|
8
|
+
|
9
|
+
|
10
|
+
MAPPING = {
|
11
|
+
"to_time_embed.1": "time_embedding.linear_1",
|
12
|
+
"to_time_embed.3": "time_embedding.linear_2",
|
13
|
+
"in_layer": "conv_in",
|
14
|
+
"out_layer.0": "conv_norm_out",
|
15
|
+
"out_layer.2": "conv_out",
|
16
|
+
"down_samples": "down_blocks",
|
17
|
+
"up_samples": "up_blocks",
|
18
|
+
"projection_lin": "encoder_hid_proj.projection_linear",
|
19
|
+
"projection_ln": "encoder_hid_proj.projection_norm",
|
20
|
+
"feature_pooling": "add_time_condition",
|
21
|
+
"to_query": "to_q",
|
22
|
+
"to_key": "to_k",
|
23
|
+
"to_value": "to_v",
|
24
|
+
"output_layer": "to_out.0",
|
25
|
+
"self_attention_block": "attentions.0",
|
26
|
+
}
|
27
|
+
|
28
|
+
DYNAMIC_MAP = {
|
29
|
+
"resnet_attn_blocks.*.0": "resnets_in.*",
|
30
|
+
"resnet_attn_blocks.*.1": ("attentions.*", 1),
|
31
|
+
"resnet_attn_blocks.*.2": "resnets_out.*",
|
32
|
+
}
|
33
|
+
# MAPPING = {}
|
34
|
+
|
35
|
+
|
36
|
+
def convert_state_dict(unet_state_dict):
|
37
|
+
"""
|
38
|
+
Convert the state dict of a U-Net model to match the key format expected by Kandinsky3UNet model.
|
39
|
+
Args:
|
40
|
+
unet_model (torch.nn.Module): The original U-Net model.
|
41
|
+
unet_kandi3_model (torch.nn.Module): The Kandinsky3UNet model to match keys with.
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
OrderedDict: The converted state dictionary.
|
45
|
+
"""
|
46
|
+
# Example of renaming logic (this will vary based on your model's architecture)
|
47
|
+
converted_state_dict = {}
|
48
|
+
for key in unet_state_dict:
|
49
|
+
new_key = key
|
50
|
+
for pattern, new_pattern in MAPPING.items():
|
51
|
+
new_key = new_key.replace(pattern, new_pattern)
|
52
|
+
|
53
|
+
for dyn_pattern, dyn_new_pattern in DYNAMIC_MAP.items():
|
54
|
+
has_matched = False
|
55
|
+
if fnmatch.fnmatch(new_key, f"*.{dyn_pattern}.*") and not has_matched:
|
56
|
+
star = int(new_key.split(dyn_pattern.split(".")[0])[-1].split(".")[1])
|
57
|
+
|
58
|
+
if isinstance(dyn_new_pattern, tuple):
|
59
|
+
new_star = star + dyn_new_pattern[-1]
|
60
|
+
dyn_new_pattern = dyn_new_pattern[0]
|
61
|
+
else:
|
62
|
+
new_star = star
|
63
|
+
|
64
|
+
pattern = dyn_pattern.replace("*", str(star))
|
65
|
+
new_pattern = dyn_new_pattern.replace("*", str(new_star))
|
66
|
+
|
67
|
+
new_key = new_key.replace(pattern, new_pattern)
|
68
|
+
has_matched = True
|
69
|
+
|
70
|
+
converted_state_dict[new_key] = unet_state_dict[key]
|
71
|
+
|
72
|
+
return converted_state_dict
|
73
|
+
|
74
|
+
|
75
|
+
def main(model_path, output_path):
|
76
|
+
# Load your original U-Net model
|
77
|
+
unet_state_dict = load_file(model_path)
|
78
|
+
|
79
|
+
# Initialize your Kandinsky3UNet model
|
80
|
+
config = {}
|
81
|
+
|
82
|
+
# Convert the state dict
|
83
|
+
converted_state_dict = convert_state_dict(unet_state_dict)
|
84
|
+
|
85
|
+
unet = Kandinsky3UNet(config)
|
86
|
+
unet.load_state_dict(converted_state_dict)
|
87
|
+
|
88
|
+
unet.save_pretrained(output_path)
|
89
|
+
print(f"Converted model saved to {output_path}")
|
90
|
+
|
91
|
+
|
92
|
+
if __name__ == "__main__":
|
93
|
+
parser = argparse.ArgumentParser(description="Convert U-Net PyTorch model to Kandinsky3UNet format")
|
94
|
+
parser.add_argument("--model_path", type=str, required=True, help="Path to the original U-Net PyTorch model")
|
95
|
+
parser.add_argument("--output_path", type=str, required=True, help="Path to save the converted model")
|
96
|
+
|
97
|
+
args = parser.parse_args()
|
98
|
+
main(args.model_path, args.output_path)
|