diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +3 -1
- diffusers/commands/fp16_safetensors.py +2 -7
- diffusers/configuration_utils.py +23 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/loaders.py +62 -64
- diffusers/models/__init__.py +1 -0
- diffusers/models/activations.py +2 -0
- diffusers/models/attention.py +45 -1
- diffusers/models/autoencoder_tiny.py +193 -0
- diffusers/models/controlnet.py +1 -1
- diffusers/models/embeddings.py +56 -0
- diffusers/models/lora.py +0 -6
- diffusers/models/modeling_flax_utils.py +28 -2
- diffusers/models/modeling_utils.py +33 -16
- diffusers/models/transformer_2d.py +26 -9
- diffusers/models/unet_1d.py +2 -2
- diffusers/models/unet_2d_blocks.py +106 -56
- diffusers/models/unet_2d_condition.py +20 -5
- diffusers/models/vae.py +106 -1
- diffusers/pipelines/__init__.py +1 -0
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
- diffusers/pipelines/auto_pipeline.py +33 -43
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
- diffusers/pipelines/pipeline_flax_utils.py +41 -4
- diffusers/pipelines/pipeline_utils.py +60 -16
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/__init__.py +1 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
- diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
- diffusers/schedulers/scheduling_consistency_models.py +70 -57
- diffusers/schedulers/scheduling_ddim.py +76 -71
- diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
- diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
- diffusers/schedulers/scheduling_ddpm.py +68 -67
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
- diffusers/schedulers/scheduling_deis_multistep.py +93 -85
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
- diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
- diffusers/schedulers/scheduling_euler_discrete.py +63 -56
- diffusers/schedulers/scheduling_heun_discrete.py +57 -45
- diffusers/schedulers/scheduling_ipndm.py +27 -22
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
- diffusers/schedulers/scheduling_karras_ve.py +55 -45
- diffusers/schedulers/scheduling_lms_discrete.py +58 -52
- diffusers/schedulers/scheduling_pndm.py +77 -62
- diffusers/schedulers/scheduling_repaint.py +56 -38
- diffusers/schedulers/scheduling_sde_ve.py +62 -50
- diffusers/schedulers/scheduling_sde_vp.py +32 -11
- diffusers/schedulers/scheduling_unclip.py +3 -3
- diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
- diffusers/schedulers/scheduling_utils.py +41 -35
- diffusers/schedulers/scheduling_utils_flax.py +8 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
- diffusers/utils/hub_utils.py +105 -2
- diffusers/utils/import_utils.py +0 -4
- diffusers/utils/pil_utils.py +19 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
- diffusers/models/cross_attention.py +0 -94
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -23,14 +23,22 @@ import flax
|
|
23
23
|
import numpy as np
|
24
24
|
import PIL
|
25
25
|
from flax.core.frozen_dict import FrozenDict
|
26
|
-
from huggingface_hub import snapshot_download
|
26
|
+
from huggingface_hub import create_repo, snapshot_download
|
27
27
|
from PIL import Image
|
28
28
|
from tqdm.auto import tqdm
|
29
29
|
|
30
30
|
from ..configuration_utils import ConfigMixin
|
31
31
|
from ..models.modeling_flax_utils import FLAX_WEIGHTS_NAME, FlaxModelMixin
|
32
32
|
from ..schedulers.scheduling_utils_flax import SCHEDULER_CONFIG_NAME, FlaxSchedulerMixin
|
33
|
-
from ..utils import
|
33
|
+
from ..utils import (
|
34
|
+
CONFIG_NAME,
|
35
|
+
DIFFUSERS_CACHE,
|
36
|
+
BaseOutput,
|
37
|
+
PushToHubMixin,
|
38
|
+
http_user_agent,
|
39
|
+
is_transformers_available,
|
40
|
+
logging,
|
41
|
+
)
|
34
42
|
|
35
43
|
|
36
44
|
if is_transformers_available():
|
@@ -90,7 +98,7 @@ class FlaxImagePipelineOutput(BaseOutput):
|
|
90
98
|
images: Union[List[PIL.Image.Image], np.ndarray]
|
91
99
|
|
92
100
|
|
93
|
-
class FlaxDiffusionPipeline(ConfigMixin):
|
101
|
+
class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
94
102
|
r"""
|
95
103
|
Base class for Flax-based pipelines.
|
96
104
|
|
@@ -139,7 +147,13 @@ class FlaxDiffusionPipeline(ConfigMixin):
|
|
139
147
|
# set models
|
140
148
|
setattr(self, name, module)
|
141
149
|
|
142
|
-
def save_pretrained(
|
150
|
+
def save_pretrained(
|
151
|
+
self,
|
152
|
+
save_directory: Union[str, os.PathLike],
|
153
|
+
params: Union[Dict, FrozenDict],
|
154
|
+
push_to_hub: bool = False,
|
155
|
+
**kwargs,
|
156
|
+
):
|
143
157
|
# TODO: handle inference_state
|
144
158
|
"""
|
145
159
|
Save all saveable variables of the pipeline to a directory. A pipeline variable can be saved and loaded if its
|
@@ -149,6 +163,12 @@ class FlaxDiffusionPipeline(ConfigMixin):
|
|
149
163
|
Arguments:
|
150
164
|
save_directory (`str` or `os.PathLike`):
|
151
165
|
Directory to which to save. Will be created if it doesn't exist.
|
166
|
+
push_to_hub (`bool`, *optional*, defaults to `False`):
|
167
|
+
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
168
|
+
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
169
|
+
namespace).
|
170
|
+
kwargs (`Dict[str, Any]`, *optional*):
|
171
|
+
Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
152
172
|
"""
|
153
173
|
self.save_config(save_directory)
|
154
174
|
|
@@ -157,6 +177,14 @@ class FlaxDiffusionPipeline(ConfigMixin):
|
|
157
177
|
model_index_dict.pop("_diffusers_version")
|
158
178
|
model_index_dict.pop("_module", None)
|
159
179
|
|
180
|
+
if push_to_hub:
|
181
|
+
commit_message = kwargs.pop("commit_message", None)
|
182
|
+
private = kwargs.pop("private", False)
|
183
|
+
create_pr = kwargs.pop("create_pr", False)
|
184
|
+
token = kwargs.pop("token", None)
|
185
|
+
repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
|
186
|
+
repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id
|
187
|
+
|
160
188
|
for pipeline_component_name in model_index_dict.keys():
|
161
189
|
sub_model = getattr(self, pipeline_component_name)
|
162
190
|
if sub_model is None:
|
@@ -188,6 +216,15 @@ class FlaxDiffusionPipeline(ConfigMixin):
|
|
188
216
|
else:
|
189
217
|
save_method(os.path.join(save_directory, pipeline_component_name))
|
190
218
|
|
219
|
+
if push_to_hub:
|
220
|
+
self._upload_folder(
|
221
|
+
save_directory,
|
222
|
+
repo_id,
|
223
|
+
token=token,
|
224
|
+
commit_message=commit_message,
|
225
|
+
create_pr=create_pr,
|
226
|
+
)
|
227
|
+
|
191
228
|
@classmethod
|
192
229
|
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
|
193
230
|
r"""
|
@@ -28,7 +28,7 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
|
28
28
|
import numpy as np
|
29
29
|
import PIL
|
30
30
|
import torch
|
31
|
-
from huggingface_hub import ModelCard, hf_hub_download, model_info, snapshot_download
|
31
|
+
from huggingface_hub import ModelCard, create_repo, hf_hub_download, model_info, snapshot_download
|
32
32
|
from packaging import version
|
33
33
|
from requests.exceptions import HTTPError
|
34
34
|
from tqdm.auto import tqdm
|
@@ -52,7 +52,6 @@ from ..utils import (
|
|
52
52
|
is_accelerate_available,
|
53
53
|
is_accelerate_version,
|
54
54
|
is_compiled_module,
|
55
|
-
is_safetensors_available,
|
56
55
|
is_torch_version,
|
57
56
|
is_transformers_available,
|
58
57
|
logging,
|
@@ -67,7 +66,7 @@ if is_transformers_available():
|
|
67
66
|
from transformers.utils import SAFE_WEIGHTS_NAME as TRANSFORMERS_SAFE_WEIGHTS_NAME
|
68
67
|
from transformers.utils import WEIGHTS_NAME as TRANSFORMERS_WEIGHTS_NAME
|
69
68
|
|
70
|
-
from ..utils import FLAX_WEIGHTS_NAME, ONNX_EXTERNAL_WEIGHTS_NAME, ONNX_WEIGHTS_NAME
|
69
|
+
from ..utils import FLAX_WEIGHTS_NAME, ONNX_EXTERNAL_WEIGHTS_NAME, ONNX_WEIGHTS_NAME, PushToHubMixin
|
71
70
|
|
72
71
|
|
73
72
|
if is_accelerate_available():
|
@@ -473,7 +472,7 @@ def load_sub_model(
|
|
473
472
|
return loaded_sub_model
|
474
473
|
|
475
474
|
|
476
|
-
class DiffusionPipeline(ConfigMixin):
|
475
|
+
class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
477
476
|
r"""
|
478
477
|
Base class for all pipelines.
|
479
478
|
|
@@ -557,8 +556,10 @@ class DiffusionPipeline(ConfigMixin):
|
|
557
556
|
def save_pretrained(
|
558
557
|
self,
|
559
558
|
save_directory: Union[str, os.PathLike],
|
560
|
-
safe_serialization: bool =
|
559
|
+
safe_serialization: bool = True,
|
561
560
|
variant: Optional[str] = None,
|
561
|
+
push_to_hub: bool = False,
|
562
|
+
**kwargs,
|
562
563
|
):
|
563
564
|
"""
|
564
565
|
Save all saveable variables of the pipeline to a directory. A pipeline variable can be saved and loaded if its
|
@@ -568,10 +569,16 @@ class DiffusionPipeline(ConfigMixin):
|
|
568
569
|
Arguments:
|
569
570
|
save_directory (`str` or `os.PathLike`):
|
570
571
|
Directory to save a pipeline to. Will be created if it doesn't exist.
|
571
|
-
safe_serialization (`bool`, *optional*, defaults to `
|
572
|
+
safe_serialization (`bool`, *optional*, defaults to `True`):
|
572
573
|
Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
|
573
574
|
variant (`str`, *optional*):
|
574
575
|
If specified, weights are saved in the format `pytorch_model.<variant>.bin`.
|
576
|
+
push_to_hub (`bool`, *optional*, defaults to `False`):
|
577
|
+
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
578
|
+
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
579
|
+
namespace).
|
580
|
+
kwargs (`Dict[str, Any]`, *optional*):
|
581
|
+
Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
575
582
|
"""
|
576
583
|
model_index_dict = dict(self.config)
|
577
584
|
model_index_dict.pop("_class_name", None)
|
@@ -579,6 +586,14 @@ class DiffusionPipeline(ConfigMixin):
|
|
579
586
|
model_index_dict.pop("_module", None)
|
580
587
|
model_index_dict.pop("_name_or_path", None)
|
581
588
|
|
589
|
+
if push_to_hub:
|
590
|
+
commit_message = kwargs.pop("commit_message", None)
|
591
|
+
private = kwargs.pop("private", False)
|
592
|
+
create_pr = kwargs.pop("create_pr", False)
|
593
|
+
token = kwargs.pop("token", None)
|
594
|
+
repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
|
595
|
+
repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id
|
596
|
+
|
582
597
|
expected_modules, optional_kwargs = self._get_signature_keys(self)
|
583
598
|
|
584
599
|
def is_saveable_module(name, value):
|
@@ -642,6 +657,15 @@ class DiffusionPipeline(ConfigMixin):
|
|
642
657
|
# finally save the config
|
643
658
|
self.save_config(save_directory)
|
644
659
|
|
660
|
+
if push_to_hub:
|
661
|
+
self._upload_folder(
|
662
|
+
save_directory,
|
663
|
+
repo_id,
|
664
|
+
token=token,
|
665
|
+
commit_message=commit_message,
|
666
|
+
create_pr=create_pr,
|
667
|
+
)
|
668
|
+
|
645
669
|
def to(
|
646
670
|
self,
|
647
671
|
torch_device: Optional[Union[str, torch.device]] = None,
|
@@ -899,7 +923,7 @@ class DiffusionPipeline(ConfigMixin):
|
|
899
923
|
offload_state_dict = kwargs.pop("offload_state_dict", False)
|
900
924
|
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
|
901
925
|
variant = kwargs.pop("variant", None)
|
902
|
-
use_safetensors = kwargs.pop("use_safetensors", None
|
926
|
+
use_safetensors = kwargs.pop("use_safetensors", None)
|
903
927
|
load_connected_pipeline = kwargs.pop("load_connected_pipeline", False)
|
904
928
|
|
905
929
|
# 1. Download the checkpoints and configs
|
@@ -1311,14 +1335,9 @@ class DiffusionPipeline(ConfigMixin):
|
|
1311
1335
|
use_onnx = kwargs.pop("use_onnx", None)
|
1312
1336
|
load_connected_pipeline = kwargs.pop("load_connected_pipeline", False)
|
1313
1337
|
|
1314
|
-
if use_safetensors and not is_safetensors_available():
|
1315
|
-
raise ValueError(
|
1316
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
1317
|
-
)
|
1318
|
-
|
1319
1338
|
allow_pickle = False
|
1320
1339
|
if use_safetensors is None:
|
1321
|
-
use_safetensors =
|
1340
|
+
use_safetensors = True
|
1322
1341
|
allow_pickle = True
|
1323
1342
|
|
1324
1343
|
allow_patterns = None
|
@@ -1375,7 +1394,7 @@ class DiffusionPipeline(ConfigMixin):
|
|
1375
1394
|
# if the whole pipeline is cached we don't have to ping the Hub
|
1376
1395
|
if revision in DEPRECATED_REVISION_ARGS and version.parse(
|
1377
1396
|
version.parse(__version__).base_version
|
1378
|
-
) >= version.parse("0.
|
1397
|
+
) >= version.parse("0.22.0"):
|
1379
1398
|
warn_deprecated_model_variant(
|
1380
1399
|
pretrained_model_name, use_auth_token, variant, revision, model_filenames
|
1381
1400
|
)
|
@@ -1669,8 +1688,16 @@ class DiffusionPipeline(ConfigMixin):
|
|
1669
1688
|
def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
|
1670
1689
|
r"""
|
1671
1690
|
Enable sliced attention computation. When this option is enabled, the attention module splits the input tensor
|
1672
|
-
in slices to compute attention in several steps.
|
1673
|
-
speed decrease.
|
1691
|
+
in slices to compute attention in several steps. For more than one attention head, the computation is performed
|
1692
|
+
sequentially over each head. This is useful to save some memory in exchange for a small speed decrease.
|
1693
|
+
|
1694
|
+
<Tip warning={true}>
|
1695
|
+
|
1696
|
+
⚠️ Don't enable attention slicing if you're already using `scaled_dot_product_attention` (SDPA) from PyTorch
|
1697
|
+
2.0 or xFormers. These attention computations are already very memory efficient so you won't need to enable
|
1698
|
+
this function. If you enable attention slicing with SDPA or xFormers, it can lead to serious slow downs!
|
1699
|
+
|
1700
|
+
</Tip>
|
1674
1701
|
|
1675
1702
|
Args:
|
1676
1703
|
slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
|
@@ -1678,6 +1705,23 @@ class DiffusionPipeline(ConfigMixin):
|
|
1678
1705
|
`"max"`, maximum amount of memory will be saved by running only one slice at a time. If a number is
|
1679
1706
|
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
|
1680
1707
|
must be a multiple of `slice_size`.
|
1708
|
+
|
1709
|
+
Examples:
|
1710
|
+
|
1711
|
+
```py
|
1712
|
+
>>> import torch
|
1713
|
+
>>> from diffusers import StableDiffusionPipeline
|
1714
|
+
|
1715
|
+
>>> pipe = StableDiffusionPipeline.from_pretrained(
|
1716
|
+
... "runwayml/stable-diffusion-v1-5",
|
1717
|
+
... torch_dtype=torch.float16,
|
1718
|
+
... use_safetensors=True,
|
1719
|
+
... )
|
1720
|
+
|
1721
|
+
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
1722
|
+
>>> pipe.enable_attention_slicing()
|
1723
|
+
>>> image = pipe(prompt).images[0]
|
1724
|
+
```
|
1681
1725
|
"""
|
1682
1726
|
self.set_attention_slice(slice_size)
|
1683
1727
|
|
@@ -442,7 +442,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline):
|
|
442
442
|
if do_classifier_free_guidance:
|
443
443
|
uncond_tokens: List[str]
|
444
444
|
if negative_prompt is None:
|
445
|
-
uncond_tokens = [""]
|
445
|
+
uncond_tokens = [""] * batch_size
|
446
446
|
elif type(prompt) is not type(negative_prompt):
|
447
447
|
raise TypeError(
|
448
448
|
f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
@@ -471,7 +471,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline):
|
|
471
471
|
|
472
472
|
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
473
473
|
seq_len = uncond_embeddings.shape[1]
|
474
|
-
uncond_embeddings = uncond_embeddings.repeat(
|
474
|
+
uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1)
|
475
475
|
uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1)
|
476
476
|
|
477
477
|
# For classifier free guidance, we need to do two forward passes.
|
@@ -45,6 +45,7 @@ else:
|
|
45
45
|
from .pipeline_cycle_diffusion import CycleDiffusionPipeline
|
46
46
|
from .pipeline_stable_diffusion import StableDiffusionPipeline
|
47
47
|
from .pipeline_stable_diffusion_attend_and_excite import StableDiffusionAttendAndExcitePipeline
|
48
|
+
from .pipeline_stable_diffusion_gligen import StableDiffusionGLIGENPipeline
|
48
49
|
from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline
|
49
50
|
from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline
|
50
51
|
from .pipeline_stable_diffusion_inpaint_legacy import StableDiffusionInpaintPipelineLegacy
|
@@ -50,7 +50,7 @@ from ...schedulers import (
|
|
50
50
|
PNDMScheduler,
|
51
51
|
UnCLIPScheduler,
|
52
52
|
)
|
53
|
-
from ...utils import is_accelerate_available, is_omegaconf_available,
|
53
|
+
from ...utils import is_accelerate_available, is_omegaconf_available, logging
|
54
54
|
from ...utils.import_utils import BACKENDS_MAPPING
|
55
55
|
from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
|
56
56
|
from ..paint_by_example import PaintByExampleImageEncoder
|
@@ -367,7 +367,7 @@ def create_diffusers_schedular(original_config):
|
|
367
367
|
|
368
368
|
|
369
369
|
def create_ldm_bert_config(original_config):
|
370
|
-
bert_params = original_config.model.
|
370
|
+
bert_params = original_config.model.params.cond_stage_config.params
|
371
371
|
config = LDMBertConfig(
|
372
372
|
d_model=bert_params.n_embed,
|
373
373
|
encoder_layers=bert_params.n_layer,
|
@@ -778,11 +778,13 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
|
|
778
778
|
def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder=None):
|
779
779
|
if text_encoder is None:
|
780
780
|
config_name = "openai/clip-vit-large-patch14"
|
781
|
-
config = CLIPTextConfig.from_pretrained(config_name)
|
781
|
+
config = CLIPTextConfig.from_pretrained(config_name, local_files_only=local_files_only)
|
782
782
|
|
783
783
|
ctx = init_empty_weights if is_accelerate_available() else nullcontext
|
784
784
|
with ctx():
|
785
785
|
text_model = CLIPTextModel(config)
|
786
|
+
else:
|
787
|
+
text_model = text_encoder
|
786
788
|
|
787
789
|
keys = list(checkpoint.keys())
|
788
790
|
|
@@ -832,8 +834,8 @@ protected = {re.escape(x[0]): x[1] for x in textenc_transformer_conversion_lst}
|
|
832
834
|
textenc_pattern = re.compile("|".join(protected.keys()))
|
833
835
|
|
834
836
|
|
835
|
-
def convert_paint_by_example_checkpoint(checkpoint):
|
836
|
-
config = CLIPVisionConfig.from_pretrained("openai/clip-vit-large-patch14")
|
837
|
+
def convert_paint_by_example_checkpoint(checkpoint, local_files_only=False):
|
838
|
+
config = CLIPVisionConfig.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
|
837
839
|
model = PaintByExampleImageEncoder(config)
|
838
840
|
|
839
841
|
keys = list(checkpoint.keys())
|
@@ -900,13 +902,18 @@ def convert_paint_by_example_checkpoint(checkpoint):
|
|
900
902
|
|
901
903
|
|
902
904
|
def convert_open_clip_checkpoint(
|
903
|
-
checkpoint,
|
905
|
+
checkpoint,
|
906
|
+
config_name,
|
907
|
+
prefix="cond_stage_model.model.",
|
908
|
+
has_projection=False,
|
909
|
+
local_files_only=False,
|
910
|
+
**config_kwargs,
|
904
911
|
):
|
905
912
|
# text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder")
|
906
913
|
# text_model = CLIPTextModelWithProjection.from_pretrained(
|
907
914
|
# "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", projection_dim=1280
|
908
915
|
# )
|
909
|
-
config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs)
|
916
|
+
config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs, local_files_only=local_files_only)
|
910
917
|
|
911
918
|
ctx = init_empty_weights if is_accelerate_available() else nullcontext
|
912
919
|
with ctx():
|
@@ -971,7 +978,7 @@ def convert_open_clip_checkpoint(
|
|
971
978
|
return text_model
|
972
979
|
|
973
980
|
|
974
|
-
def stable_unclip_image_encoder(original_config):
|
981
|
+
def stable_unclip_image_encoder(original_config, local_files_only=False):
|
975
982
|
"""
|
976
983
|
Returns the image processor and clip image encoder for the img2img unclip pipeline.
|
977
984
|
|
@@ -989,13 +996,17 @@ def stable_unclip_image_encoder(original_config):
|
|
989
996
|
|
990
997
|
if clip_model_name == "ViT-L/14":
|
991
998
|
feature_extractor = CLIPImageProcessor()
|
992
|
-
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
999
|
+
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
1000
|
+
"openai/clip-vit-large-patch14", local_files_only=local_files_only
|
1001
|
+
)
|
993
1002
|
else:
|
994
1003
|
raise NotImplementedError(f"Unknown CLIP checkpoint name in stable diffusion checkpoint {clip_model_name}")
|
995
1004
|
|
996
1005
|
elif sd_clip_image_embedder_class == "FrozenOpenCLIPImageEmbedder":
|
997
1006
|
feature_extractor = CLIPImageProcessor()
|
998
|
-
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
1007
|
+
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
1008
|
+
"laion/CLIP-ViT-H-14-laion2B-s32B-b79K", local_files_only=local_files_only
|
1009
|
+
)
|
999
1010
|
else:
|
1000
1011
|
raise NotImplementedError(
|
1001
1012
|
f"Unknown CLIP image embedder class in stable diffusion checkpoint {sd_clip_image_embedder_class}"
|
@@ -1070,7 +1081,9 @@ def convert_controlnet_checkpoint(
|
|
1070
1081
|
if cross_attention_dim is not None:
|
1071
1082
|
ctrlnet_config["cross_attention_dim"] = cross_attention_dim
|
1072
1083
|
|
1073
|
-
|
1084
|
+
ctx = init_empty_weights if is_accelerate_available() else nullcontext
|
1085
|
+
with ctx():
|
1086
|
+
controlnet = ControlNetModel(**ctrlnet_config)
|
1074
1087
|
|
1075
1088
|
# Some controlnet ckpt files are distributed independently from the rest of the
|
1076
1089
|
# model components i.e. https://huggingface.co/thibaud/controlnet-sd21/
|
@@ -1088,7 +1101,11 @@ def convert_controlnet_checkpoint(
|
|
1088
1101
|
skip_extract_state_dict=skip_extract_state_dict,
|
1089
1102
|
)
|
1090
1103
|
|
1091
|
-
|
1104
|
+
if is_accelerate_available():
|
1105
|
+
for param_name, param in converted_ctrl_checkpoint.items():
|
1106
|
+
set_module_tensor_to_device(controlnet, param_name, "cpu", value=param)
|
1107
|
+
else:
|
1108
|
+
controlnet.load_state_dict(converted_ctrl_checkpoint)
|
1092
1109
|
|
1093
1110
|
return controlnet
|
1094
1111
|
|
@@ -1116,6 +1133,7 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1116
1133
|
vae=None,
|
1117
1134
|
text_encoder=None,
|
1118
1135
|
tokenizer=None,
|
1136
|
+
config_files=None,
|
1119
1137
|
) -> DiffusionPipeline:
|
1120
1138
|
"""
|
1121
1139
|
Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml`
|
@@ -1175,6 +1193,13 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1175
1193
|
[CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer)
|
1176
1194
|
to use. If this parameter is `None`, the function will load a new instance of [CLIPTokenizer] by itself, if
|
1177
1195
|
needed.
|
1196
|
+
config_files (`Dict[str, str]`, *optional*, defaults to `None`):
|
1197
|
+
A dictionary mapping from config file names to their contents. If this parameter is `None`, the function
|
1198
|
+
will load the config files by itself, if needed. Valid keys are:
|
1199
|
+
- `v1`: Config file for Stable Diffusion v1
|
1200
|
+
- `v2`: Config file for Stable Diffusion v2
|
1201
|
+
- `xl`: Config file for Stable Diffusion XL
|
1202
|
+
- `xl_refiner`: Config file for Stable Diffusion XL Refiner
|
1178
1203
|
return: A StableDiffusionPipeline object representing the passed-in `.ckpt`/`.safetensors` file.
|
1179
1204
|
"""
|
1180
1205
|
|
@@ -1186,7 +1211,6 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1186
1211
|
StableDiffusionInpaintPipeline,
|
1187
1212
|
StableDiffusionPipeline,
|
1188
1213
|
StableDiffusionXLImg2ImgPipeline,
|
1189
|
-
StableDiffusionXLPipeline,
|
1190
1214
|
StableUnCLIPImg2ImgPipeline,
|
1191
1215
|
StableUnCLIPPipeline,
|
1192
1216
|
)
|
@@ -1203,9 +1227,6 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1203
1227
|
from omegaconf import OmegaConf
|
1204
1228
|
|
1205
1229
|
if from_safetensors:
|
1206
|
-
if not is_safetensors_available():
|
1207
|
-
raise ValueError(BACKENDS_MAPPING["safetensors"][1])
|
1208
|
-
|
1209
1230
|
from safetensors.torch import load_file as safe_load
|
1210
1231
|
|
1211
1232
|
checkpoint = safe_load(checkpoint_path, device="cpu")
|
@@ -1397,14 +1418,16 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1397
1418
|
else:
|
1398
1419
|
vae.load_state_dict(converted_vae_checkpoint)
|
1399
1420
|
elif vae is None:
|
1400
|
-
vae = AutoencoderKL.from_pretrained(vae_path)
|
1421
|
+
vae = AutoencoderKL.from_pretrained(vae_path, local_files_only=local_files_only)
|
1401
1422
|
|
1402
1423
|
if model_type == "FrozenOpenCLIPEmbedder":
|
1403
1424
|
config_name = "stabilityai/stable-diffusion-2"
|
1404
1425
|
config_kwargs = {"subfolder": "text_encoder"}
|
1405
1426
|
|
1406
1427
|
text_model = convert_open_clip_checkpoint(checkpoint, config_name, **config_kwargs)
|
1407
|
-
tokenizer = CLIPTokenizer.from_pretrained(
|
1428
|
+
tokenizer = CLIPTokenizer.from_pretrained(
|
1429
|
+
"stabilityai/stable-diffusion-2", subfolder="tokenizer", local_files_only=local_files_only
|
1430
|
+
)
|
1408
1431
|
|
1409
1432
|
if stable_unclip is None:
|
1410
1433
|
if controlnet:
|
@@ -1456,12 +1479,20 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1456
1479
|
elif stable_unclip == "txt2img":
|
1457
1480
|
if stable_unclip_prior is None or stable_unclip_prior == "karlo":
|
1458
1481
|
karlo_model = "kakaobrain/karlo-v1-alpha"
|
1459
|
-
prior = PriorTransformer.from_pretrained(
|
1460
|
-
|
1461
|
-
|
1462
|
-
|
1463
|
-
|
1464
|
-
|
1482
|
+
prior = PriorTransformer.from_pretrained(
|
1483
|
+
karlo_model, subfolder="prior", local_files_only=local_files_only
|
1484
|
+
)
|
1485
|
+
|
1486
|
+
prior_tokenizer = CLIPTokenizer.from_pretrained(
|
1487
|
+
"openai/clip-vit-large-patch14", local_files_only=local_files_only
|
1488
|
+
)
|
1489
|
+
prior_text_model = CLIPTextModelWithProjection.from_pretrained(
|
1490
|
+
"openai/clip-vit-large-patch14", local_files_only=local_files_only
|
1491
|
+
)
|
1492
|
+
|
1493
|
+
prior_scheduler = UnCLIPScheduler.from_pretrained(
|
1494
|
+
karlo_model, subfolder="prior_scheduler", local_files_only=local_files_only
|
1495
|
+
)
|
1465
1496
|
prior_scheduler = DDPMScheduler.from_config(prior_scheduler.config)
|
1466
1497
|
else:
|
1467
1498
|
raise NotImplementedError(f"unknown prior for stable unclip model: {stable_unclip_prior}")
|
@@ -1487,8 +1518,10 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1487
1518
|
raise NotImplementedError(f"unknown `stable_unclip` type: {stable_unclip}")
|
1488
1519
|
elif model_type == "PaintByExample":
|
1489
1520
|
vision_model = convert_paint_by_example_checkpoint(checkpoint)
|
1490
|
-
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
1491
|
-
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
1521
|
+
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
|
1522
|
+
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
1523
|
+
"CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only
|
1524
|
+
)
|
1492
1525
|
pipe = PaintByExamplePipeline(
|
1493
1526
|
vae=vae,
|
1494
1527
|
image_encoder=vision_model,
|
@@ -1501,11 +1534,19 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1501
1534
|
text_model = convert_ldm_clip_checkpoint(
|
1502
1535
|
checkpoint, local_files_only=local_files_only, text_encoder=text_encoder
|
1503
1536
|
)
|
1504
|
-
tokenizer =
|
1537
|
+
tokenizer = (
|
1538
|
+
CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
|
1539
|
+
if tokenizer is None
|
1540
|
+
else tokenizer
|
1541
|
+
)
|
1505
1542
|
|
1506
1543
|
if load_safety_checker:
|
1507
|
-
safety_checker = StableDiffusionSafetyChecker.from_pretrained(
|
1508
|
-
|
1544
|
+
safety_checker = StableDiffusionSafetyChecker.from_pretrained(
|
1545
|
+
"CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only
|
1546
|
+
)
|
1547
|
+
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
1548
|
+
"CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only
|
1549
|
+
)
|
1509
1550
|
else:
|
1510
1551
|
safety_checker = None
|
1511
1552
|
feature_extractor = None
|
@@ -1533,9 +1574,13 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1533
1574
|
)
|
1534
1575
|
elif model_type in ["SDXL", "SDXL-Refiner"]:
|
1535
1576
|
if model_type == "SDXL":
|
1536
|
-
tokenizer = CLIPTokenizer.from_pretrained(
|
1577
|
+
tokenizer = CLIPTokenizer.from_pretrained(
|
1578
|
+
"openai/clip-vit-large-patch14", local_files_only=local_files_only
|
1579
|
+
)
|
1537
1580
|
text_encoder = convert_ldm_clip_checkpoint(checkpoint, local_files_only=local_files_only)
|
1538
|
-
tokenizer_2 = CLIPTokenizer.from_pretrained(
|
1581
|
+
tokenizer_2 = CLIPTokenizer.from_pretrained(
|
1582
|
+
"laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only
|
1583
|
+
)
|
1539
1584
|
|
1540
1585
|
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
|
1541
1586
|
config_kwargs = {"projection_dim": 1280}
|
@@ -1543,7 +1588,7 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1543
1588
|
checkpoint, config_name, prefix="conditioner.embedders.1.model.", has_projection=True, **config_kwargs
|
1544
1589
|
)
|
1545
1590
|
|
1546
|
-
pipe =
|
1591
|
+
pipe = pipeline_class(
|
1547
1592
|
vae=vae,
|
1548
1593
|
text_encoder=text_encoder,
|
1549
1594
|
tokenizer=tokenizer,
|
@@ -1556,7 +1601,9 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1556
1601
|
else:
|
1557
1602
|
tokenizer = None
|
1558
1603
|
text_encoder = None
|
1559
|
-
tokenizer_2 = CLIPTokenizer.from_pretrained(
|
1604
|
+
tokenizer_2 = CLIPTokenizer.from_pretrained(
|
1605
|
+
"laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only
|
1606
|
+
)
|
1560
1607
|
|
1561
1608
|
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
|
1562
1609
|
config_kwargs = {"projection_dim": 1280}
|
@@ -1578,7 +1625,7 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1578
1625
|
else:
|
1579
1626
|
text_config = create_ldm_bert_config(original_config)
|
1580
1627
|
text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
|
1581
|
-
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
|
1628
|
+
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased", local_files_only=local_files_only)
|
1582
1629
|
pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
|
1583
1630
|
|
1584
1631
|
return pipe
|
@@ -1602,9 +1649,6 @@ def download_controlnet_from_original_ckpt(
|
|
1602
1649
|
from omegaconf import OmegaConf
|
1603
1650
|
|
1604
1651
|
if from_safetensors:
|
1605
|
-
if not is_safetensors_available():
|
1606
|
-
raise ValueError(BACKENDS_MAPPING["safetensors"][1])
|
1607
|
-
|
1608
1652
|
from safetensors import safe_open
|
1609
1653
|
|
1610
1654
|
checkpoint = {}
|
@@ -346,7 +346,14 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
|
346
346
|
)
|
347
347
|
prompt_embeds = prompt_embeds[0]
|
348
348
|
|
349
|
-
|
349
|
+
if self.text_encoder is not None:
|
350
|
+
prompt_embeds_dtype = self.text_encoder.dtype
|
351
|
+
elif self.unet is not None:
|
352
|
+
prompt_embeds_dtype = self.unet.dtype
|
353
|
+
else:
|
354
|
+
prompt_embeds_dtype = prompt_embeds.dtype
|
355
|
+
|
356
|
+
prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
350
357
|
|
351
358
|
bs_embed, seq_len, _ = prompt_embeds.shape
|
352
359
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
@@ -402,7 +409,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
|
402
409
|
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
403
410
|
seq_len = negative_prompt_embeds.shape[1]
|
404
411
|
|
405
|
-
negative_prompt_embeds = negative_prompt_embeds.to(dtype=
|
412
|
+
negative_prompt_embeds = negative_prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
406
413
|
|
407
414
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
408
415
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
@@ -644,7 +651,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
|
644
651
|
every step.
|
645
652
|
cross_attention_kwargs (`dict`, *optional*):
|
646
653
|
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
647
|
-
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/
|
654
|
+
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
648
655
|
|
649
656
|
Example:
|
650
657
|
|
@@ -336,7 +336,14 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
|
|
336
336
|
)
|
337
337
|
prompt_embeds = prompt_embeds[0]
|
338
338
|
|
339
|
-
|
339
|
+
if self.text_encoder is not None:
|
340
|
+
prompt_embeds_dtype = self.text_encoder.dtype
|
341
|
+
elif self.unet is not None:
|
342
|
+
prompt_embeds_dtype = self.unet.dtype
|
343
|
+
else:
|
344
|
+
prompt_embeds_dtype = prompt_embeds.dtype
|
345
|
+
|
346
|
+
prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
340
347
|
|
341
348
|
bs_embed, seq_len, _ = prompt_embeds.shape
|
342
349
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
@@ -392,7 +399,7 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
|
|
392
399
|
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
393
400
|
seq_len = negative_prompt_embeds.shape[1]
|
394
401
|
|
395
|
-
negative_prompt_embeds = negative_prompt_embeds.to(dtype=
|
402
|
+
negative_prompt_embeds = negative_prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
396
403
|
|
397
404
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
398
405
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
@@ -585,7 +592,7 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
|
|
585
592
|
every step.
|
586
593
|
cross_attention_kwargs (`dict`, *optional*):
|
587
594
|
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
588
|
-
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/
|
595
|
+
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
589
596
|
guidance_rescale (`float`, *optional*, defaults to 0.7):
|
590
597
|
Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
|
591
598
|
Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
|