diffusers 0.18.0__py3-none-any.whl → 0.18.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +1 -1
- diffusers/configuration_utils.py +2 -2
- diffusers/loaders.py +5 -5
- diffusers/models/attention_flax.py +10 -5
- diffusers/models/modeling_utils.py +1 -1
- diffusers/pipelines/pipeline_utils.py +13 -4
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +87 -34
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +11 -7
- diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -5
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +72 -40
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +53 -42
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/METADATA +1 -1
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/RECORD +18 -18
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
diffusers/__init__.py
CHANGED
diffusers/configuration_utils.py
CHANGED
@@ -607,7 +607,7 @@ def register_to_config(init):
|
|
607
607
|
|
608
608
|
# Take note of the parameters that were not present in the loaded config
|
609
609
|
if len(set(new_kwargs.keys()) - set(init_kwargs)) > 0:
|
610
|
-
new_kwargs["_use_default_values"] = set(new_kwargs.keys()) - set(init_kwargs)
|
610
|
+
new_kwargs["_use_default_values"] = list(set(new_kwargs.keys()) - set(init_kwargs))
|
611
611
|
|
612
612
|
new_kwargs = {**config_init_kwargs, **new_kwargs}
|
613
613
|
getattr(self, "register_to_config")(**new_kwargs)
|
@@ -655,7 +655,7 @@ def flax_register_to_config(cls):
|
|
655
655
|
|
656
656
|
# Take note of the parameters that were not present in the loaded config
|
657
657
|
if len(set(new_kwargs.keys()) - set(init_kwargs)) > 0:
|
658
|
-
new_kwargs["_use_default_values"] = set(new_kwargs.keys()) - set(init_kwargs)
|
658
|
+
new_kwargs["_use_default_values"] = list(set(new_kwargs.keys()) - set(init_kwargs))
|
659
659
|
|
660
660
|
getattr(self, "register_to_config")(**new_kwargs)
|
661
661
|
original_init(self, *args, **kwargs)
|
diffusers/loaders.py
CHANGED
@@ -177,7 +177,7 @@ class UNet2DConditionLoadersMixin:
|
|
177
177
|
|
178
178
|
if use_safetensors and not is_safetensors_available():
|
179
179
|
raise ValueError(
|
180
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
180
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
181
181
|
)
|
182
182
|
|
183
183
|
allow_pickle = False
|
@@ -589,7 +589,7 @@ class TextualInversionLoaderMixin:
|
|
589
589
|
|
590
590
|
if use_safetensors and not is_safetensors_available():
|
591
591
|
raise ValueError(
|
592
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
592
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
593
593
|
)
|
594
594
|
|
595
595
|
allow_pickle = False
|
@@ -806,7 +806,7 @@ class LoraLoaderMixin:
|
|
806
806
|
|
807
807
|
if use_safetensors and not is_safetensors_available():
|
808
808
|
raise ValueError(
|
809
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
809
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
810
810
|
)
|
811
811
|
|
812
812
|
allow_pickle = False
|
@@ -1054,7 +1054,7 @@ class LoraLoaderMixin:
|
|
1054
1054
|
|
1055
1055
|
if use_safetensors and not is_safetensors_available():
|
1056
1056
|
raise ValueError(
|
1057
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
1057
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
1058
1058
|
)
|
1059
1059
|
|
1060
1060
|
allow_pickle = False
|
@@ -1394,7 +1394,7 @@ class FromSingleFileMixin:
|
|
1394
1394
|
use_auth_token = kwargs.pop("use_auth_token", None)
|
1395
1395
|
revision = kwargs.pop("revision", None)
|
1396
1396
|
extract_ema = kwargs.pop("extract_ema", False)
|
1397
|
-
image_size = kwargs.pop("image_size",
|
1397
|
+
image_size = kwargs.pop("image_size", None)
|
1398
1398
|
scheduler_type = kwargs.pop("scheduler_type", "pndm")
|
1399
1399
|
num_in_channels = kwargs.pop("num_in_channels", None)
|
1400
1400
|
upcast_attention = kwargs.pop("upcast_attention", None)
|
@@ -152,6 +152,7 @@ class FlaxAttention(nn.Module):
|
|
152
152
|
self.value = nn.Dense(inner_dim, use_bias=False, dtype=self.dtype, name="to_v")
|
153
153
|
|
154
154
|
self.proj_attn = nn.Dense(self.query_dim, dtype=self.dtype, name="to_out_0")
|
155
|
+
self.dropout_layer = nn.Dropout(rate=self.dropout)
|
155
156
|
|
156
157
|
def reshape_heads_to_batch_dim(self, tensor):
|
157
158
|
batch_size, seq_len, dim = tensor.shape
|
@@ -214,7 +215,7 @@ class FlaxAttention(nn.Module):
|
|
214
215
|
|
215
216
|
hidden_states = self.reshape_batch_dim_to_heads(hidden_states)
|
216
217
|
hidden_states = self.proj_attn(hidden_states)
|
217
|
-
return hidden_states
|
218
|
+
return self.dropout_layer(hidden_states, deterministic=deterministic)
|
218
219
|
|
219
220
|
|
220
221
|
class FlaxBasicTransformerBlock(nn.Module):
|
@@ -260,6 +261,7 @@ class FlaxBasicTransformerBlock(nn.Module):
|
|
260
261
|
self.norm1 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
|
261
262
|
self.norm2 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
|
262
263
|
self.norm3 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
|
264
|
+
self.dropout_layer = nn.Dropout(rate=self.dropout)
|
263
265
|
|
264
266
|
def __call__(self, hidden_states, context, deterministic=True):
|
265
267
|
# self attention
|
@@ -280,7 +282,7 @@ class FlaxBasicTransformerBlock(nn.Module):
|
|
280
282
|
hidden_states = self.ff(self.norm3(hidden_states), deterministic=deterministic)
|
281
283
|
hidden_states = hidden_states + residual
|
282
284
|
|
283
|
-
return hidden_states
|
285
|
+
return self.dropout_layer(hidden_states, deterministic=deterministic)
|
284
286
|
|
285
287
|
|
286
288
|
class FlaxTransformer2DModel(nn.Module):
|
@@ -356,6 +358,8 @@ class FlaxTransformer2DModel(nn.Module):
|
|
356
358
|
dtype=self.dtype,
|
357
359
|
)
|
358
360
|
|
361
|
+
self.dropout_layer = nn.Dropout(rate=self.dropout)
|
362
|
+
|
359
363
|
def __call__(self, hidden_states, context, deterministic=True):
|
360
364
|
batch, height, width, channels = hidden_states.shape
|
361
365
|
residual = hidden_states
|
@@ -378,7 +382,7 @@ class FlaxTransformer2DModel(nn.Module):
|
|
378
382
|
hidden_states = self.proj_out(hidden_states)
|
379
383
|
|
380
384
|
hidden_states = hidden_states + residual
|
381
|
-
return hidden_states
|
385
|
+
return self.dropout_layer(hidden_states, deterministic=deterministic)
|
382
386
|
|
383
387
|
|
384
388
|
class FlaxFeedForward(nn.Module):
|
@@ -409,7 +413,7 @@ class FlaxFeedForward(nn.Module):
|
|
409
413
|
self.net_2 = nn.Dense(self.dim, dtype=self.dtype)
|
410
414
|
|
411
415
|
def __call__(self, hidden_states, deterministic=True):
|
412
|
-
hidden_states = self.net_0(hidden_states)
|
416
|
+
hidden_states = self.net_0(hidden_states, deterministic=deterministic)
|
413
417
|
hidden_states = self.net_2(hidden_states)
|
414
418
|
return hidden_states
|
415
419
|
|
@@ -434,8 +438,9 @@ class FlaxGEGLU(nn.Module):
|
|
434
438
|
def setup(self):
|
435
439
|
inner_dim = self.dim * 4
|
436
440
|
self.proj = nn.Dense(inner_dim * 2, dtype=self.dtype)
|
441
|
+
self.dropout_layer = nn.Dropout(rate=self.dropout)
|
437
442
|
|
438
443
|
def __call__(self, hidden_states, deterministic=True):
|
439
444
|
hidden_states = self.proj(hidden_states)
|
440
445
|
hidden_linear, hidden_gelu = jnp.split(hidden_states, 2, axis=2)
|
441
|
-
return hidden_linear * nn.gelu(hidden_gelu)
|
446
|
+
return self.dropout_layer(hidden_linear * nn.gelu(hidden_gelu), deterministic=deterministic)
|
@@ -456,7 +456,7 @@ class ModelMixin(torch.nn.Module):
|
|
456
456
|
|
457
457
|
if use_safetensors and not is_safetensors_available():
|
458
458
|
raise ValueError(
|
459
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
459
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
460
460
|
)
|
461
461
|
|
462
462
|
allow_pickle = False
|
@@ -204,7 +204,7 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
|
|
204
204
|
transformers_index_format = r"\d{5}-of-\d{5}"
|
205
205
|
|
206
206
|
if variant is not None:
|
207
|
-
# `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.
|
207
|
+
# `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.safetensors`
|
208
208
|
variant_file_re = re.compile(
|
209
209
|
rf"({'|'.join(weight_prefixes)})\.({variant}|{variant}-{transformers_index_format})\.({'|'.join(weight_suffixs)})$"
|
210
210
|
)
|
@@ -213,7 +213,7 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
|
|
213
213
|
rf"({'|'.join(weight_prefixes)})\.({'|'.join(weight_suffixs)})\.index\.{variant}\.json$"
|
214
214
|
)
|
215
215
|
|
216
|
-
# `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.
|
216
|
+
# `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.safetensors`
|
217
217
|
non_variant_file_re = re.compile(
|
218
218
|
rf"({'|'.join(weight_prefixes)})(-{transformers_index_format})?\.({'|'.join(weight_suffixs)})$"
|
219
219
|
)
|
@@ -1168,7 +1168,7 @@ class DiffusionPipeline(ConfigMixin):
|
|
1168
1168
|
|
1169
1169
|
if use_safetensors and not is_safetensors_available():
|
1170
1170
|
raise ValueError(
|
1171
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
1171
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
1172
1172
|
)
|
1173
1173
|
|
1174
1174
|
allow_pickle = False
|
@@ -1213,6 +1213,15 @@ class DiffusionPipeline(ConfigMixin):
|
|
1213
1213
|
filenames = {sibling.rfilename for sibling in info.siblings}
|
1214
1214
|
model_filenames, variant_filenames = variant_compatible_siblings(filenames, variant=variant)
|
1215
1215
|
|
1216
|
+
if len(variant_filenames) == 0 and variant is not None:
|
1217
|
+
deprecation_message = (
|
1218
|
+
f"You are trying to load the model files of the `variant={variant}`, but no such modeling files are available."
|
1219
|
+
f"The default model files: {model_filenames} will be loaded instead. Make sure to not load from `variant={variant}`"
|
1220
|
+
"if such variant modeling files are not available. Doing so will lead to an error in v0.22.0 as defaulting to non-variant"
|
1221
|
+
"modeling files is deprecated."
|
1222
|
+
)
|
1223
|
+
deprecate("no variant default", "0.22.0", deprecation_message, standard_warn=False)
|
1224
|
+
|
1216
1225
|
# remove ignored filenames
|
1217
1226
|
model_filenames = set(model_filenames) - set(ignore_filenames)
|
1218
1227
|
variant_filenames = set(variant_filenames) - set(ignore_filenames)
|
@@ -1302,7 +1311,7 @@ class DiffusionPipeline(ConfigMixin):
|
|
1302
1311
|
snapshot_folder = Path(config_file).parent
|
1303
1312
|
pipeline_is_cached = all((snapshot_folder / f).is_file() for f in expected_files)
|
1304
1313
|
|
1305
|
-
if pipeline_is_cached:
|
1314
|
+
if pipeline_is_cached and not force_download:
|
1306
1315
|
# if the pipeline is cached, we can directly return it
|
1307
1316
|
# else call snapshot_download
|
1308
1317
|
return snapshot_folder
|
@@ -24,6 +24,7 @@ from transformers import (
|
|
24
24
|
AutoFeatureExtractor,
|
25
25
|
BertTokenizerFast,
|
26
26
|
CLIPImageProcessor,
|
27
|
+
CLIPTextConfig,
|
27
28
|
CLIPTextModel,
|
28
29
|
CLIPTextModelWithProjection,
|
29
30
|
CLIPTokenizer,
|
@@ -48,7 +49,7 @@ from ...schedulers import (
|
|
48
49
|
PNDMScheduler,
|
49
50
|
UnCLIPScheduler,
|
50
51
|
)
|
51
|
-
from ...utils import is_omegaconf_available, is_safetensors_available, logging
|
52
|
+
from ...utils import is_accelerate_available, is_omegaconf_available, is_safetensors_available, logging
|
52
53
|
from ...utils.import_utils import BACKENDS_MAPPING
|
53
54
|
from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
|
54
55
|
from ..paint_by_example import PaintByExampleImageEncoder
|
@@ -57,6 +58,10 @@ from .safety_checker import StableDiffusionSafetyChecker
|
|
57
58
|
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
|
58
59
|
|
59
60
|
|
61
|
+
if is_accelerate_available():
|
62
|
+
from accelerate import init_empty_weights
|
63
|
+
from accelerate.utils import set_module_tensor_to_device
|
64
|
+
|
60
65
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
61
66
|
|
62
67
|
|
@@ -391,8 +396,8 @@ def convert_ldm_unet_checkpoint(
|
|
391
396
|
|
392
397
|
# at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
|
393
398
|
if sum(k.startswith("model_ema") for k in keys) > 100 and extract_ema:
|
394
|
-
|
395
|
-
|
399
|
+
logger.warning(f"Checkpoint {path} has both EMA and non-EMA weights.")
|
400
|
+
logger.warning(
|
396
401
|
"In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
|
397
402
|
" weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
|
398
403
|
)
|
@@ -402,7 +407,7 @@ def convert_ldm_unet_checkpoint(
|
|
402
407
|
unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key)
|
403
408
|
else:
|
404
409
|
if sum(k.startswith("model_ema") for k in keys) > 100:
|
405
|
-
|
410
|
+
logger.warning(
|
406
411
|
"In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
|
407
412
|
" weights (usually better for inference), please make sure to add the `--extract_ema` flag."
|
408
413
|
)
|
@@ -770,11 +775,12 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
|
|
770
775
|
|
771
776
|
|
772
777
|
def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder=None):
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
+
if text_encoder is None:
|
779
|
+
config_name = "openai/clip-vit-large-patch14"
|
780
|
+
config = CLIPTextConfig.from_pretrained(config_name)
|
781
|
+
|
782
|
+
with init_empty_weights():
|
783
|
+
text_model = CLIPTextModel(config)
|
778
784
|
|
779
785
|
keys = list(checkpoint.keys())
|
780
786
|
|
@@ -787,7 +793,8 @@ def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder
|
|
787
793
|
if key.startswith(prefix):
|
788
794
|
text_model_dict[key[len(prefix + ".") :]] = checkpoint[key]
|
789
795
|
|
790
|
-
|
796
|
+
for param_name, param in text_model_dict.items():
|
797
|
+
set_module_tensor_to_device(text_model, param_name, "cpu", value=param)
|
791
798
|
|
792
799
|
return text_model
|
793
800
|
|
@@ -884,14 +891,26 @@ def convert_paint_by_example_checkpoint(checkpoint):
|
|
884
891
|
return model
|
885
892
|
|
886
893
|
|
887
|
-
def convert_open_clip_checkpoint(
|
894
|
+
def convert_open_clip_checkpoint(
|
895
|
+
checkpoint, config_name, prefix="cond_stage_model.model.", has_projection=False, **config_kwargs
|
896
|
+
):
|
888
897
|
# text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder")
|
889
|
-
text_model = CLIPTextModelWithProjection.from_pretrained(
|
890
|
-
|
891
|
-
)
|
898
|
+
# text_model = CLIPTextModelWithProjection.from_pretrained(
|
899
|
+
# "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", projection_dim=1280
|
900
|
+
# )
|
901
|
+
config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs)
|
902
|
+
|
903
|
+
with init_empty_weights():
|
904
|
+
text_model = CLIPTextModelWithProjection(config) if has_projection else CLIPTextModel(config)
|
892
905
|
|
893
906
|
keys = list(checkpoint.keys())
|
894
907
|
|
908
|
+
keys_to_ignore = []
|
909
|
+
if config_name == "stabilityai/stable-diffusion-2" and config.num_hidden_layers == 23:
|
910
|
+
# make sure to remove all keys > 22
|
911
|
+
keys_to_ignore += [k for k in keys if k.startswith("cond_stage_model.model.transformer.resblocks.23")]
|
912
|
+
keys_to_ignore += ["cond_stage_model.model.text_projection"]
|
913
|
+
|
895
914
|
text_model_dict = {}
|
896
915
|
|
897
916
|
if prefix + "text_projection" in checkpoint:
|
@@ -902,8 +921,8 @@ def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
|
|
902
921
|
text_model_dict["text_model.embeddings.position_ids"] = text_model.text_model.embeddings.get_buffer("position_ids")
|
903
922
|
|
904
923
|
for key in keys:
|
905
|
-
|
906
|
-
|
924
|
+
if key in keys_to_ignore:
|
925
|
+
continue
|
907
926
|
if key[len(prefix) :] in textenc_conversion_map:
|
908
927
|
if key.endswith("text_projection"):
|
909
928
|
value = checkpoint[key].T
|
@@ -931,7 +950,8 @@ def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
|
|
931
950
|
|
932
951
|
text_model_dict[new_key] = checkpoint[key]
|
933
952
|
|
934
|
-
|
953
|
+
for param_name, param in text_model_dict.items():
|
954
|
+
set_module_tensor_to_device(text_model, param_name, "cpu", value=param)
|
935
955
|
|
936
956
|
return text_model
|
937
957
|
|
@@ -1061,7 +1081,7 @@ def convert_controlnet_checkpoint(
|
|
1061
1081
|
def download_from_original_stable_diffusion_ckpt(
|
1062
1082
|
checkpoint_path: str,
|
1063
1083
|
original_config_file: str = None,
|
1064
|
-
image_size: int =
|
1084
|
+
image_size: Optional[int] = None,
|
1065
1085
|
prediction_type: str = None,
|
1066
1086
|
model_type: str = None,
|
1067
1087
|
extract_ema: bool = False,
|
@@ -1144,6 +1164,7 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1144
1164
|
LDMTextToImagePipeline,
|
1145
1165
|
PaintByExamplePipeline,
|
1146
1166
|
StableDiffusionControlNetPipeline,
|
1167
|
+
StableDiffusionInpaintPipeline,
|
1147
1168
|
StableDiffusionPipeline,
|
1148
1169
|
StableDiffusionXLImg2ImgPipeline,
|
1149
1170
|
StableDiffusionXLPipeline,
|
@@ -1166,12 +1187,9 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1166
1187
|
if not is_safetensors_available():
|
1167
1188
|
raise ValueError(BACKENDS_MAPPING["safetensors"][1])
|
1168
1189
|
|
1169
|
-
from safetensors import
|
1190
|
+
from safetensors.torch import load_file as safe_load
|
1170
1191
|
|
1171
|
-
checkpoint =
|
1172
|
-
with safe_open(checkpoint_path, framework="pt", device="cpu") as f:
|
1173
|
-
for key in f.keys():
|
1174
|
-
checkpoint[key] = f.get_tensor(key)
|
1192
|
+
checkpoint = safe_load(checkpoint_path, device="cpu")
|
1175
1193
|
else:
|
1176
1194
|
if device is None:
|
1177
1195
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -1183,7 +1201,7 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1183
1201
|
if "global_step" in checkpoint:
|
1184
1202
|
global_step = checkpoint["global_step"]
|
1185
1203
|
else:
|
1186
|
-
|
1204
|
+
logger.debug("global_step key not found in model")
|
1187
1205
|
global_step = None
|
1188
1206
|
|
1189
1207
|
# NOTE: this while loop isn't great but this controlnet checkpoint has one additional
|
@@ -1230,8 +1248,15 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1230
1248
|
model_type = "SDXL"
|
1231
1249
|
else:
|
1232
1250
|
model_type = "SDXL-Refiner"
|
1251
|
+
if image_size is None:
|
1252
|
+
image_size = 1024
|
1233
1253
|
|
1234
|
-
if num_in_channels is
|
1254
|
+
if num_in_channels is None and pipeline_class == StableDiffusionInpaintPipeline:
|
1255
|
+
num_in_channels = 9
|
1256
|
+
elif num_in_channels is None:
|
1257
|
+
num_in_channels = 4
|
1258
|
+
|
1259
|
+
if "unet_config" in original_config.model.params:
|
1235
1260
|
original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels
|
1236
1261
|
|
1237
1262
|
if (
|
@@ -1263,7 +1288,6 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1263
1288
|
num_train_timesteps = getattr(original_config.model.params, "timesteps", None) or 1000
|
1264
1289
|
|
1265
1290
|
if model_type in ["SDXL", "SDXL-Refiner"]:
|
1266
|
-
image_size = 1024
|
1267
1291
|
scheduler_dict = {
|
1268
1292
|
"beta_schedule": "scaled_linear",
|
1269
1293
|
"beta_start": 0.00085,
|
@@ -1279,7 +1303,6 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1279
1303
|
}
|
1280
1304
|
scheduler = EulerDiscreteScheduler.from_config(scheduler_dict)
|
1281
1305
|
scheduler_type = "euler"
|
1282
|
-
vae_path = "stabilityai/sdxl-vae"
|
1283
1306
|
else:
|
1284
1307
|
beta_start = getattr(original_config.model.params, "linear_start", None) or 0.02
|
1285
1308
|
beta_end = getattr(original_config.model.params, "linear_end", None) or 0.085
|
@@ -1318,25 +1341,45 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1318
1341
|
# Convert the UNet2DConditionModel model.
|
1319
1342
|
unet_config = create_unet_diffusers_config(original_config, image_size=image_size)
|
1320
1343
|
unet_config["upcast_attention"] = upcast_attention
|
1321
|
-
|
1344
|
+
with init_empty_weights():
|
1345
|
+
unet = UNet2DConditionModel(**unet_config)
|
1322
1346
|
|
1323
1347
|
converted_unet_checkpoint = convert_ldm_unet_checkpoint(
|
1324
1348
|
checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema
|
1325
1349
|
)
|
1326
|
-
|
1350
|
+
|
1351
|
+
for param_name, param in converted_unet_checkpoint.items():
|
1352
|
+
set_module_tensor_to_device(unet, param_name, "cpu", value=param)
|
1327
1353
|
|
1328
1354
|
# Convert the VAE model.
|
1329
1355
|
if vae_path is None:
|
1330
1356
|
vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
|
1331
1357
|
converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
|
1332
1358
|
|
1333
|
-
|
1334
|
-
|
1359
|
+
if (
|
1360
|
+
"model" in original_config
|
1361
|
+
and "params" in original_config.model
|
1362
|
+
and "scale_factor" in original_config.model.params
|
1363
|
+
):
|
1364
|
+
vae_scaling_factor = original_config.model.params.scale_factor
|
1365
|
+
else:
|
1366
|
+
vae_scaling_factor = 0.18215 # default SD scaling factor
|
1367
|
+
|
1368
|
+
vae_config["scaling_factor"] = vae_scaling_factor
|
1369
|
+
|
1370
|
+
with init_empty_weights():
|
1371
|
+
vae = AutoencoderKL(**vae_config)
|
1372
|
+
|
1373
|
+
for param_name, param in converted_vae_checkpoint.items():
|
1374
|
+
set_module_tensor_to_device(vae, param_name, "cpu", value=param)
|
1335
1375
|
else:
|
1336
1376
|
vae = AutoencoderKL.from_pretrained(vae_path)
|
1337
1377
|
|
1338
1378
|
if model_type == "FrozenOpenCLIPEmbedder":
|
1339
|
-
|
1379
|
+
config_name = "stabilityai/stable-diffusion-2"
|
1380
|
+
config_kwargs = {"subfolder": "text_encoder"}
|
1381
|
+
|
1382
|
+
text_model = convert_open_clip_checkpoint(checkpoint, config_name, **config_kwargs)
|
1340
1383
|
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer")
|
1341
1384
|
|
1342
1385
|
if stable_unclip is None:
|
@@ -1469,7 +1512,12 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1469
1512
|
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
1470
1513
|
text_encoder = convert_ldm_clip_checkpoint(checkpoint, local_files_only=local_files_only)
|
1471
1514
|
tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
|
1472
|
-
|
1515
|
+
|
1516
|
+
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
|
1517
|
+
config_kwargs = {"projection_dim": 1280}
|
1518
|
+
text_encoder_2 = convert_open_clip_checkpoint(
|
1519
|
+
checkpoint, config_name, prefix="conditioner.embedders.1.model.", has_projection=True, **config_kwargs
|
1520
|
+
)
|
1473
1521
|
|
1474
1522
|
pipe = StableDiffusionXLPipeline(
|
1475
1523
|
vae=vae,
|
@@ -1485,7 +1533,12 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1485
1533
|
tokenizer = None
|
1486
1534
|
text_encoder = None
|
1487
1535
|
tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
|
1488
|
-
|
1536
|
+
|
1537
|
+
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
|
1538
|
+
config_kwargs = {"projection_dim": 1280}
|
1539
|
+
text_encoder_2 = convert_open_clip_checkpoint(
|
1540
|
+
checkpoint, config_name, prefix="conditioner.embedders.0.model.", has_projection=True, **config_kwargs
|
1541
|
+
)
|
1489
1542
|
|
1490
1543
|
pipe = StableDiffusionXLImg2ImgPipeline(
|
1491
1544
|
vae=vae,
|
@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
|
24
24
|
|
25
25
|
from ...configuration_utils import FrozenDict
|
26
26
|
from ...image_processor import VaeImageProcessor
|
27
|
-
from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
|
27
|
+
from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
28
28
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
29
29
|
from ...schedulers import KarrasDiffusionSchedulers
|
30
30
|
from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging, randn_tensor
|
@@ -153,7 +153,9 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image: bool
|
|
153
153
|
return mask, masked_image
|
154
154
|
|
155
155
|
|
156
|
-
class StableDiffusionInpaintPipeline(
|
156
|
+
class StableDiffusionInpaintPipeline(
|
157
|
+
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
|
158
|
+
):
|
157
159
|
r"""
|
158
160
|
Pipeline for text-guided image inpainting using Stable Diffusion.
|
159
161
|
|
@@ -748,15 +748,19 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
|
|
748
748
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
749
749
|
self.vae.to(dtype=torch.float32)
|
750
750
|
|
751
|
-
use_torch_2_0_or_xformers =
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
751
|
+
use_torch_2_0_or_xformers = isinstance(
|
752
|
+
self.vae.decoder.mid_block.attentions[0].processor,
|
753
|
+
(
|
754
|
+
AttnProcessor2_0,
|
755
|
+
XFormersAttnProcessor,
|
756
|
+
LoRAXFormersAttnProcessor,
|
757
|
+
LoRAAttnProcessor2_0,
|
758
|
+
),
|
759
|
+
)
|
760
|
+
|
757
761
|
# if xformers or torch_2_0 is used attention block does not need
|
758
762
|
# to be in float32 which can save lots of memory
|
759
|
-
if
|
763
|
+
if use_torch_2_0_or_xformers:
|
760
764
|
self.vae.post_quant_conv.to(latents.dtype)
|
761
765
|
self.vae.decoder.conv_in.to(latents.dtype)
|
762
766
|
self.vae.decoder.mid_block.to(latents.dtype)
|
@@ -8,7 +8,6 @@ from ...utils import BaseOutput, is_invisible_watermark_available, is_torch_avai
|
|
8
8
|
|
9
9
|
|
10
10
|
@dataclass
|
11
|
-
# Copied from diffusers.pipelines.stable_diffusion.__init__.StableDiffusionPipelineOutput with StableDiffusion->StableDiffusionXL
|
12
11
|
class StableDiffusionXLPipelineOutput(BaseOutput):
|
13
12
|
"""
|
14
13
|
Output class for Stable Diffusion pipelines.
|
@@ -17,13 +16,9 @@ class StableDiffusionXLPipelineOutput(BaseOutput):
|
|
17
16
|
images (`List[PIL.Image.Image]` or `np.ndarray`)
|
18
17
|
List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
|
19
18
|
num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
|
20
|
-
nsfw_content_detected (`List[bool]`)
|
21
|
-
List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
22
|
-
(nsfw) content, or `None` if safety checking could not be performed.
|
23
19
|
"""
|
24
20
|
|
25
21
|
images: Union[List[PIL.Image.Image], np.ndarray]
|
26
|
-
nsfw_content_detected: Optional[List[bool]]
|
27
22
|
|
28
23
|
|
29
24
|
if is_transformers_available() and is_torch_available() and is_invisible_watermark_available():
|
@@ -129,9 +129,11 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
129
129
|
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
130
130
|
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
131
131
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
132
|
+
self.default_sample_size = self.unet.config.sample_size
|
132
133
|
|
133
134
|
self.watermark = StableDiffusionXLWatermarker()
|
134
135
|
|
136
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
135
137
|
def enable_vae_slicing(self):
|
136
138
|
r"""
|
137
139
|
Enable sliced VAE decoding.
|
@@ -141,6 +143,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
141
143
|
"""
|
142
144
|
self.vae.enable_slicing()
|
143
145
|
|
146
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
|
144
147
|
def disable_vae_slicing(self):
|
145
148
|
r"""
|
146
149
|
Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
|
@@ -148,6 +151,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
148
151
|
"""
|
149
152
|
self.vae.disable_slicing()
|
150
153
|
|
154
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
|
151
155
|
def enable_vae_tiling(self):
|
152
156
|
r"""
|
153
157
|
Enable tiled VAE decoding.
|
@@ -157,6 +161,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
157
161
|
"""
|
158
162
|
self.vae.enable_tiling()
|
159
163
|
|
164
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
|
160
165
|
def disable_vae_tiling(self):
|
161
166
|
r"""
|
162
167
|
Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
|
@@ -183,7 +188,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
183
188
|
self.to("cpu", silence_dtype_warnings=True)
|
184
189
|
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
|
185
190
|
|
186
|
-
for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
|
191
|
+
for cpu_offloaded_model in [self.unet, self.text_encoder, self.text_encoder_2, self.vae]:
|
187
192
|
cpu_offload(cpu_offloaded_model, device)
|
188
193
|
|
189
194
|
def enable_model_cpu_offload(self, gpu_id=0):
|
@@ -217,6 +222,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
217
222
|
self.final_offload_hook = hook
|
218
223
|
|
219
224
|
@property
|
225
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
|
220
226
|
def _execution_device(self):
|
221
227
|
r"""
|
222
228
|
Returns the device on which the pipeline's models will be executed. After calling
|
@@ -237,12 +243,14 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
237
243
|
def encode_prompt(
|
238
244
|
self,
|
239
245
|
prompt,
|
240
|
-
device,
|
241
|
-
num_images_per_prompt,
|
242
|
-
do_classifier_free_guidance,
|
246
|
+
device: Optional[torch.device] = None,
|
247
|
+
num_images_per_prompt: int = 1,
|
248
|
+
do_classifier_free_guidance: bool = True,
|
243
249
|
negative_prompt=None,
|
244
250
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
245
251
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
252
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
253
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
246
254
|
lora_scale: Optional[float] = None,
|
247
255
|
):
|
248
256
|
r"""
|
@@ -268,9 +276,18 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
268
276
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
269
277
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
270
278
|
argument.
|
279
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
280
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
281
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
282
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
283
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
284
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
285
|
+
input argument.
|
271
286
|
lora_scale (`float`, *optional*):
|
272
287
|
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
273
288
|
"""
|
289
|
+
device = device or self._execution_device
|
290
|
+
|
274
291
|
# set lora scale so that monkey patched LoRA
|
275
292
|
# function of text encoder can correctly access it
|
276
293
|
if lora_scale is not None and isinstance(self, LoraLoaderMixin):
|
@@ -399,6 +416,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
399
416
|
|
400
417
|
negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
|
401
418
|
|
419
|
+
bs_embed = pooled_prompt_embeds.shape[0]
|
402
420
|
pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
|
403
421
|
bs_embed * num_images_per_prompt, -1
|
404
422
|
)
|
@@ -408,20 +426,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
408
426
|
|
409
427
|
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
410
428
|
|
411
|
-
|
412
|
-
if self.safety_checker is None:
|
413
|
-
has_nsfw_concept = None
|
414
|
-
else:
|
415
|
-
if torch.is_tensor(image):
|
416
|
-
feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
|
417
|
-
else:
|
418
|
-
feature_extractor_input = self.image_processor.numpy_to_pil(image)
|
419
|
-
safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
|
420
|
-
image, has_nsfw_concept = self.safety_checker(
|
421
|
-
images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
|
422
|
-
)
|
423
|
-
return image, has_nsfw_concept
|
424
|
-
|
429
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
425
430
|
def prepare_extra_step_kwargs(self, generator, eta):
|
426
431
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
427
432
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
@@ -448,6 +453,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
448
453
|
negative_prompt=None,
|
449
454
|
prompt_embeds=None,
|
450
455
|
negative_prompt_embeds=None,
|
456
|
+
pooled_prompt_embeds=None,
|
457
|
+
negative_pooled_prompt_embeds=None,
|
451
458
|
):
|
452
459
|
if height % 8 != 0 or width % 8 != 0:
|
453
460
|
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
|
@@ -486,6 +493,17 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
486
493
|
f" {negative_prompt_embeds.shape}."
|
487
494
|
)
|
488
495
|
|
496
|
+
if prompt_embeds is not None and pooled_prompt_embeds is None:
|
497
|
+
raise ValueError(
|
498
|
+
"If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
|
499
|
+
)
|
500
|
+
|
501
|
+
if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
|
502
|
+
raise ValueError(
|
503
|
+
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
504
|
+
)
|
505
|
+
|
506
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
489
507
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
490
508
|
shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
|
491
509
|
if isinstance(generator, list) and len(generator) != batch_size:
|
@@ -535,6 +553,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
535
553
|
latents: Optional[torch.FloatTensor] = None,
|
536
554
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
537
555
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
556
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
557
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
538
558
|
output_type: Optional[str] = "pil",
|
539
559
|
return_dict: bool = True,
|
540
560
|
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
@@ -588,6 +608,13 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
588
608
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
589
609
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
590
610
|
argument.
|
611
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
612
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
613
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
614
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
615
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
616
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
617
|
+
input argument.
|
591
618
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
592
619
|
The output format of the generate image. Choose between
|
593
620
|
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
@@ -626,15 +653,23 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
626
653
|
"not-safe-for-work" (nsfw) content, according to the `safety_checker`.
|
627
654
|
"""
|
628
655
|
# 0. Default height and width to unet
|
629
|
-
height = height or self.
|
630
|
-
width = width or self.
|
656
|
+
height = height or self.default_sample_size * self.vae_scale_factor
|
657
|
+
width = width or self.default_sample_size * self.vae_scale_factor
|
631
658
|
|
632
659
|
original_size = original_size or (height, width)
|
633
660
|
target_size = target_size or (height, width)
|
634
661
|
|
635
662
|
# 1. Check inputs. Raise error if not correct
|
636
663
|
self.check_inputs(
|
637
|
-
prompt,
|
664
|
+
prompt,
|
665
|
+
height,
|
666
|
+
width,
|
667
|
+
callback_steps,
|
668
|
+
negative_prompt,
|
669
|
+
prompt_embeds,
|
670
|
+
negative_prompt_embeds,
|
671
|
+
pooled_prompt_embeds,
|
672
|
+
negative_pooled_prompt_embeds,
|
638
673
|
)
|
639
674
|
|
640
675
|
# 2. Define call parameters
|
@@ -669,6 +704,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
669
704
|
negative_prompt,
|
670
705
|
prompt_embeds=prompt_embeds,
|
671
706
|
negative_prompt_embeds=negative_prompt_embeds,
|
707
|
+
pooled_prompt_embeds=pooled_prompt_embeds,
|
708
|
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
672
709
|
lora_scale=text_encoder_lora_scale,
|
673
710
|
)
|
674
711
|
|
@@ -749,15 +786,18 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
749
786
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
750
787
|
self.vae.to(dtype=torch.float32)
|
751
788
|
|
752
|
-
use_torch_2_0_or_xformers =
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
789
|
+
use_torch_2_0_or_xformers = isinstance(
|
790
|
+
self.vae.decoder.mid_block.attentions[0].processor,
|
791
|
+
(
|
792
|
+
AttnProcessor2_0,
|
793
|
+
XFormersAttnProcessor,
|
794
|
+
LoRAXFormersAttnProcessor,
|
795
|
+
LoRAAttnProcessor2_0,
|
796
|
+
),
|
797
|
+
)
|
758
798
|
# if xformers or torch_2_0 is used attention block does not need
|
759
799
|
# to be in float32 which can save lots of memory
|
760
|
-
if
|
800
|
+
if use_torch_2_0_or_xformers:
|
761
801
|
self.vae.post_quant_conv.to(latents.dtype)
|
762
802
|
self.vae.decoder.conv_in.to(latents.dtype)
|
763
803
|
self.vae.decoder.mid_block.to(latents.dtype)
|
@@ -765,27 +805,19 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
765
805
|
latents = latents.float()
|
766
806
|
|
767
807
|
if not output_type == "latent":
|
768
|
-
# CHECK there is problem here (PVP)
|
769
808
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
770
|
-
has_nsfw_concept = None
|
771
809
|
else:
|
772
810
|
image = latents
|
773
|
-
|
774
|
-
return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=None)
|
775
|
-
|
776
|
-
if has_nsfw_concept is None:
|
777
|
-
do_denormalize = [True] * image.shape[0]
|
778
|
-
else:
|
779
|
-
do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
|
811
|
+
return StableDiffusionXLPipelineOutput(images=image)
|
780
812
|
|
781
813
|
image = self.watermark.apply_watermark(image)
|
782
|
-
image = self.image_processor.postprocess(image, output_type=output_type
|
814
|
+
image = self.image_processor.postprocess(image, output_type=output_type)
|
783
815
|
|
784
816
|
# Offload last model to CPU
|
785
817
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
786
818
|
self.final_offload_hook.offload()
|
787
819
|
|
788
820
|
if not return_dict:
|
789
|
-
return (image,
|
821
|
+
return (image,)
|
790
822
|
|
791
|
-
return StableDiffusionXLPipelineOutput(images=image
|
823
|
+
return StableDiffusionXLPipelineOutput(images=image)
|
@@ -140,6 +140,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
140
140
|
|
141
141
|
self.watermark = StableDiffusionXLWatermarker()
|
142
142
|
|
143
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
143
144
|
def enable_vae_slicing(self):
|
144
145
|
r"""
|
145
146
|
Enable sliced VAE decoding.
|
@@ -149,6 +150,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
149
150
|
"""
|
150
151
|
self.vae.enable_slicing()
|
151
152
|
|
153
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
|
152
154
|
def disable_vae_slicing(self):
|
153
155
|
r"""
|
154
156
|
Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
|
@@ -156,6 +158,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
156
158
|
"""
|
157
159
|
self.vae.disable_slicing()
|
158
160
|
|
161
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
|
159
162
|
def enable_vae_tiling(self):
|
160
163
|
r"""
|
161
164
|
Enable tiled VAE decoding.
|
@@ -165,6 +168,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
165
168
|
"""
|
166
169
|
self.vae.enable_tiling()
|
167
170
|
|
171
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
|
168
172
|
def disable_vae_tiling(self):
|
169
173
|
r"""
|
170
174
|
Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
|
@@ -172,6 +176,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
172
176
|
"""
|
173
177
|
self.vae.disable_tiling()
|
174
178
|
|
179
|
+
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_sequential_cpu_offload
|
175
180
|
def enable_sequential_cpu_offload(self, gpu_id=0):
|
176
181
|
r"""
|
177
182
|
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
@@ -191,9 +196,10 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
191
196
|
self.to("cpu", silence_dtype_warnings=True)
|
192
197
|
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
|
193
198
|
|
194
|
-
for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
|
199
|
+
for cpu_offloaded_model in [self.unet, self.text_encoder, self.text_encoder_2, self.vae]:
|
195
200
|
cpu_offload(cpu_offloaded_model, device)
|
196
201
|
|
202
|
+
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_model_cpu_offload
|
197
203
|
def enable_model_cpu_offload(self, gpu_id=0):
|
198
204
|
r"""
|
199
205
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
@@ -225,6 +231,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
225
231
|
self.final_offload_hook = hook
|
226
232
|
|
227
233
|
@property
|
234
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
|
228
235
|
def _execution_device(self):
|
229
236
|
r"""
|
230
237
|
Returns the device on which the pipeline's models will be executed. After calling
|
@@ -242,15 +249,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
242
249
|
return torch.device(module._hf_hook.execution_device)
|
243
250
|
return self.device
|
244
251
|
|
252
|
+
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
|
245
253
|
def encode_prompt(
|
246
254
|
self,
|
247
255
|
prompt,
|
248
|
-
device,
|
249
|
-
num_images_per_prompt,
|
250
|
-
do_classifier_free_guidance,
|
256
|
+
device: Optional[torch.device] = None,
|
257
|
+
num_images_per_prompt: int = 1,
|
258
|
+
do_classifier_free_guidance: bool = True,
|
251
259
|
negative_prompt=None,
|
252
260
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
253
261
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
262
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
263
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
254
264
|
lora_scale: Optional[float] = None,
|
255
265
|
):
|
256
266
|
r"""
|
@@ -276,9 +286,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
276
286
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
277
287
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
278
288
|
argument.
|
289
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
290
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
291
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
292
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
293
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
294
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
295
|
+
input argument.
|
279
296
|
lora_scale (`float`, *optional*):
|
280
297
|
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
281
298
|
"""
|
299
|
+
device = device or self._execution_device
|
300
|
+
|
282
301
|
# set lora scale so that monkey patched LoRA
|
283
302
|
# function of text encoder can correctly access it
|
284
303
|
if lora_scale is not None and isinstance(self, LoraLoaderMixin):
|
@@ -327,13 +346,11 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
327
346
|
text_input_ids.to(device),
|
328
347
|
output_hidden_states=True,
|
329
348
|
)
|
349
|
+
|
330
350
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
331
351
|
pooled_prompt_embeds = prompt_embeds[0]
|
332
|
-
|
333
352
|
prompt_embeds = prompt_embeds.hidden_states[-2]
|
334
353
|
|
335
|
-
prompt_embeds = prompt_embeds
|
336
|
-
|
337
354
|
bs_embed, seq_len, _ = prompt_embeds.shape
|
338
355
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
339
356
|
prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
@@ -349,10 +366,9 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
349
366
|
negative_prompt_embeds = torch.zeros_like(prompt_embeds)
|
350
367
|
negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
|
351
368
|
elif do_classifier_free_guidance and negative_prompt_embeds is None:
|
369
|
+
negative_prompt = negative_prompt or ""
|
352
370
|
uncond_tokens: List[str]
|
353
|
-
if
|
354
|
-
uncond_tokens = [""] * batch_size
|
355
|
-
elif prompt is not None and type(prompt) is not type(negative_prompt):
|
371
|
+
if prompt is not None and type(prompt) is not type(negative_prompt):
|
356
372
|
raise TypeError(
|
357
373
|
f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
358
374
|
f" {type(prompt)}."
|
@@ -389,7 +405,6 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
389
405
|
)
|
390
406
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
391
407
|
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
|
392
|
-
|
393
408
|
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
|
394
409
|
|
395
410
|
if do_classifier_free_guidance:
|
@@ -411,6 +426,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
411
426
|
|
412
427
|
negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
|
413
428
|
|
429
|
+
bs_embed = pooled_prompt_embeds.shape[0]
|
414
430
|
pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
|
415
431
|
bs_embed * num_images_per_prompt, -1
|
416
432
|
)
|
@@ -420,20 +436,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
420
436
|
|
421
437
|
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
422
438
|
|
423
|
-
|
424
|
-
if self.safety_checker is None:
|
425
|
-
has_nsfw_concept = None
|
426
|
-
else:
|
427
|
-
if torch.is_tensor(image):
|
428
|
-
feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
|
429
|
-
else:
|
430
|
-
feature_extractor_input = self.image_processor.numpy_to_pil(image)
|
431
|
-
safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
|
432
|
-
image, has_nsfw_concept = self.safety_checker(
|
433
|
-
images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
|
434
|
-
)
|
435
|
-
return image, has_nsfw_concept
|
436
|
-
|
439
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
437
440
|
def prepare_extra_step_kwargs(self, generator, eta):
|
438
441
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
439
442
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
@@ -624,6 +627,8 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
624
627
|
latents: Optional[torch.FloatTensor] = None,
|
625
628
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
626
629
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
630
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
631
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
627
632
|
output_type: Optional[str] = "pil",
|
628
633
|
return_dict: bool = True,
|
629
634
|
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
@@ -683,6 +688,13 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
683
688
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
684
689
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
685
690
|
argument.
|
691
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
692
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
693
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
694
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
695
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
696
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
697
|
+
input argument.
|
686
698
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
687
699
|
The output format of the generate image. Choose between
|
688
700
|
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
@@ -759,6 +771,8 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
759
771
|
negative_prompt,
|
760
772
|
prompt_embeds=prompt_embeds,
|
761
773
|
negative_prompt_embeds=negative_prompt_embeds,
|
774
|
+
pooled_prompt_embeds=pooled_prompt_embeds,
|
775
|
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
762
776
|
lora_scale=text_encoder_lora_scale,
|
763
777
|
)
|
764
778
|
|
@@ -845,15 +859,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
845
859
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
846
860
|
self.vae.to(dtype=torch.float32)
|
847
861
|
|
848
|
-
use_torch_2_0_or_xformers =
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
862
|
+
use_torch_2_0_or_xformers = isinstance(
|
863
|
+
self.vae.decoder.mid_block.attentions[0].processor,
|
864
|
+
(
|
865
|
+
AttnProcessor2_0,
|
866
|
+
XFormersAttnProcessor,
|
867
|
+
LoRAXFormersAttnProcessor,
|
868
|
+
LoRAAttnProcessor2_0,
|
869
|
+
),
|
870
|
+
)
|
854
871
|
# if xformers or torch_2_0 is used attention block does not need
|
855
872
|
# to be in float32 which can save lots of memory
|
856
|
-
if
|
873
|
+
if use_torch_2_0_or_xformers:
|
857
874
|
self.vae.post_quant_conv.to(latents.dtype)
|
858
875
|
self.vae.decoder.conv_in.to(latents.dtype)
|
859
876
|
self.vae.decoder.mid_block.to(latents.dtype)
|
@@ -862,24 +879,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
862
879
|
|
863
880
|
if not output_type == "latent":
|
864
881
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
865
|
-
has_nsfw_concept = None
|
866
882
|
else:
|
867
883
|
image = latents
|
868
|
-
return StableDiffusionXLPipelineOutput(images=image
|
869
|
-
|
870
|
-
if has_nsfw_concept is None:
|
871
|
-
do_denormalize = [True] * image.shape[0]
|
872
|
-
else:
|
873
|
-
do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
|
884
|
+
return StableDiffusionXLPipelineOutput(images=image)
|
874
885
|
|
875
886
|
image = self.watermark.apply_watermark(image)
|
876
|
-
image = self.image_processor.postprocess(image, output_type=output_type
|
887
|
+
image = self.image_processor.postprocess(image, output_type=output_type)
|
877
888
|
|
878
889
|
# Offload last model to CPU
|
879
890
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
880
891
|
self.final_offload_hook.offload()
|
881
892
|
|
882
893
|
if not return_dict:
|
883
|
-
return (image,
|
894
|
+
return (image,)
|
884
895
|
|
885
|
-
return StableDiffusionXLPipelineOutput(images=image
|
896
|
+
return StableDiffusionXLPipelineOutput(images=image)
|
@@ -1,9 +1,9 @@
|
|
1
|
-
diffusers/__init__.py,sha256=
|
2
|
-
diffusers/configuration_utils.py,sha256
|
1
|
+
diffusers/__init__.py,sha256=Vtoe0ie8nREHRwBNNwzbyQ2rwqLTcB4399y6DBFTOok,9329
|
2
|
+
diffusers/configuration_utils.py,sha256=--Nwf_FViQXq71M8PcgUUjT_YoLV1WYqV49Fnk-amkk,30369
|
3
3
|
diffusers/dependency_versions_check.py,sha256=T2AQMFfOGMCULAqRAE8zf1VE5j7GFxxs7SfEuhTY4lA,1756
|
4
4
|
diffusers/dependency_versions_table.py,sha256=TnzJqBXnJYMXeMw61Lgq_QlTkjWydwOKDIKbV0RXG6Q,1446
|
5
5
|
diffusers/image_processor.py,sha256=VqdToqZY-Xdb0sqibwVn1A9gdGOU3OvgQpr67mnMWGg,13700
|
6
|
-
diffusers/loaders.py,sha256=
|
6
|
+
diffusers/loaders.py,sha256=9trJ4QdgKOmfqguAKHq73fu5VDjw13krtgyJq7AnpQw,75161
|
7
7
|
diffusers/optimization.py,sha256=KZpFO98pzgt1l-etti_7k5c-EK9WEY3-XossN6VEGrs,14546
|
8
8
|
diffusers/pipeline_utils.py,sha256=dJVuXQ_ZBHkW64dwPbIPM51QnqQKIp9-WSIhRQYlJg4,1147
|
9
9
|
diffusers/training_utils.py,sha256=TEuw7ro2RT35ujfMW2DKzb1KZpF4-HfuKSZ1NNnIIvI,13195
|
@@ -16,7 +16,7 @@ diffusers/experimental/rl/value_guided_sampling.py,sha256=iIhf1gc2QP7Jx4HrsoOyRC
|
|
16
16
|
diffusers/models/__init__.py,sha256=MDG83d8C1YGGSnGNwi9sG6c33_FEaMGS3BVGnaqWJqQ,1446
|
17
17
|
diffusers/models/activations.py,sha256=cWe7qw4wR626ADw-abcV3lI1v5Vim_R_eNMc5jPlaLo,297
|
18
18
|
diffusers/models/attention.py,sha256=Nfmze9IvGR5a6ir9o0Z4DbAQ8repJxBo2_t4fDsnvHw,15197
|
19
|
-
diffusers/models/attention_flax.py,sha256=
|
19
|
+
diffusers/models/attention_flax.py,sha256=6IOINRK5flDgnzsLiSLIfhBnDtdY9LyhcDIUXVS_Gag,18142
|
20
20
|
diffusers/models/attention_processor.py,sha256=04g9405fWhb-C0xO9cnn-LfAMcSwxZ9fOzYrX98aa6A,70119
|
21
21
|
diffusers/models/autoencoder_kl.py,sha256=qM2oRqJROHvA3PSwMDmNISQzK3oFmgJiRRzvHZw9dHQ,17913
|
22
22
|
diffusers/models/controlnet.py,sha256=OzCVtpmlJXTfIze3Bmc6p7lGFirxvlI-MroHL7HQ5mQ,33086
|
@@ -28,7 +28,7 @@ diffusers/models/embeddings_flax.py,sha256=87ysODCdTERpYfH-EDhElOUyCAu8z6-xIQCqL
|
|
28
28
|
diffusers/models/modeling_flax_pytorch_utils.py,sha256=yFQHU86DdvrzFLfkTbyZZ0_PWKrjnp08s46dD-wf_tw,4601
|
29
29
|
diffusers/models/modeling_flax_utils.py,sha256=0ailGzoCLU5-81rn048e2UJEr0S1lHGBQGqpOJzWfWQ,26071
|
30
30
|
diffusers/models/modeling_pytorch_flax_utils.py,sha256=5dt6mC956MYrIMp8Owvx8QQv8xsfik6vu0frgb_c6HE,6974
|
31
|
-
diffusers/models/modeling_utils.py,sha256=
|
31
|
+
diffusers/models/modeling_utils.py,sha256=vHf-AWIwuTvyjtOCbTryupWmQLxiujNhBfVL0hmop_k,46588
|
32
32
|
diffusers/models/prior_transformer.py,sha256=5A8Tgq4VXkjH0ib05kPHXPObekLYdrRwuCgnGvoMVN4,16574
|
33
33
|
diffusers/models/resnet.py,sha256=y9FIuXYUTHYA3AFUeDBwiHJVu0crM0fMRnzEJ3ZtVf4,35294
|
34
34
|
diffusers/models/resnet_flax.py,sha256=VKF-ti1jlH_GnlWRy9dY6ETc-W9ZitfQoNjmrFAQxuU,4021
|
@@ -50,7 +50,7 @@ diffusers/models/vq_model.py,sha256=_98GsNUGg3HxcC97zQSgxEPVuDNvn1DcJP6TCTpGLVE,
|
|
50
50
|
diffusers/pipelines/__init__.py,sha256=pjJh4SXSHjSBtzzAsiuQp64YQ03xPMdgTzK-0-iV9Ew,7009
|
51
51
|
diffusers/pipelines/onnx_utils.py,sha256=M-6GBVRFji_ik5x1CMxrz9r5oEBr9TTblqLsI1HfiS4,8282
|
52
52
|
diffusers/pipelines/pipeline_flax_utils.py,sha256=CLjAhcwfBJ1xTbdRbyWHGdcd5uRJDoXDdxruuK2t2iM,25924
|
53
|
-
diffusers/pipelines/pipeline_utils.py,sha256=
|
53
|
+
diffusers/pipelines/pipeline_utils.py,sha256=2P6oTVvZcs33-LoWUQosYkdsl1bEKE3MfnQdhkjubRw,72464
|
54
54
|
diffusers/pipelines/alt_diffusion/__init__.py,sha256=rCOBtGQ7xi3DahUXY8r5ICt_t6S0ogp4uDJL9q4avso,1346
|
55
55
|
diffusers/pipelines/alt_diffusion/modeling_roberta_series.py,sha256=_UC4IxHAg2QAFtw4yCvo2eLIDBRmg2JvvtOr6k5PFC8,5580
|
56
56
|
diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py,sha256=YgUvsnah_cIXNwzJgxE87fftWox7leXOY8lzZeph7c8,40641
|
@@ -127,7 +127,7 @@ diffusers/pipelines/spectrogram_diffusion/midi_utils.py,sha256=HmOSMSaKZlloW8J6m
|
|
127
127
|
diffusers/pipelines/spectrogram_diffusion/notes_encoder.py,sha256=Yq3W0lkAMGhx5pGklTvomBHjqR1nAVALBcYlzZBSQ90,2921
|
128
128
|
diffusers/pipelines/spectrogram_diffusion/pipeline_spectrogram_diffusion.py,sha256=GAHovdMWMhmGgS02kFOaS7_Lq9AJmTxrBZC0VElHwBQ,8657
|
129
129
|
diffusers/pipelines/stable_diffusion/__init__.py,sha256=nBYUiO6TbCsqNfImNCPi1aE-Q35Lc5r9B7qWb9TDjcM,6164
|
130
|
-
diffusers/pipelines/stable_diffusion/convert_from_ckpt.py,sha256=
|
130
|
+
diffusers/pipelines/stable_diffusion/convert_from_ckpt.py,sha256=wYHT2MGLa6LFcmlvtxgDCQ5tqZCqejur2hN-0YL0GsA,69501
|
131
131
|
diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py,sha256=sA76ZiTUVOTiCMDss7z3nouqg8czJwBmhX7OPuYheWk,43554
|
132
132
|
diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py,sha256=Pbprq5sXlbS6JPP44eOzzm0FrwsccrHoaXuFWY_Kx38,20922
|
133
133
|
diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_controlnet.py,sha256=rjxVeyAmMTQiTiQF7Q9y3BYU45jCImKZGx37ir8zpM8,1257
|
@@ -145,7 +145,7 @@ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py,sha2
|
|
145
145
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py,sha256=-k3M22p4KXDkUJAnxj4xU9VM_QBBqs_pV0XHlfYzsKk,80921
|
146
146
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py,sha256=xOvGZvBBvB1Ee_9B46bKpRK5C9SuxbNZhrQ44nvIYsQ,23137
|
147
147
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py,sha256=XPNN7lINZoezjS2ciifKfIvRXotUbB_dyByaAPOE3Vs,42149
|
148
|
-
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py,sha256=
|
148
|
+
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py,sha256=0Q9m9yJ5rED3U1vyjmrHVft9jIqB35KkfDerc_VYJNg,55140
|
149
149
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py,sha256=gzklmzg25O3y48wNubrl_jzKwzZlgiperjyV6OqdvxU,42148
|
150
150
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py,sha256=g8yEk161s1CO8NDVm6ZTMdONtilXDIKQWI6dMnRk6Bs,40919
|
151
151
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py,sha256=UM-T14SgwlIk9is3QDQx7IsTWU-_ZsD1iA4rCO9-3Rk,31679
|
@@ -156,7 +156,7 @@ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py,sha25
|
|
156
156
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py,sha256=cnwmjjTQA5lMd-pK-MhnwLF8bK-pwZu0S3xiHLO2WFk,43302
|
157
157
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py,sha256=_EtteuRKzWruht97EpQO7zrqoyhGyZUUgDFuw04mW5M,62705
|
158
158
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py,sha256=xiBT7l8nha5HLg2MEeOYozUwmNIUZzbW1mjgt6b7JU8,40491
|
159
|
-
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py,sha256=
|
159
|
+
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py,sha256=TOJge0fcAt10pg-hJdjhQdMi53pBzBrcx7x6vIGMZVc,38610
|
160
160
|
diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py,sha256=ef5W_IynHqocHCWJcX32MUxRPvShH3spsv0RA_lBlTk,46256
|
161
161
|
diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py,sha256=YSwcDOwL1sE6ItdYm1ZuYb3uZVAf-DzuhtndV_Auqzw,40305
|
162
162
|
diffusers/pipelines/stable_diffusion/safety_checker.py,sha256=zLs3meGi6JiRYlHntPiBEaU9_JjYcZnzrPa5picFiG4,5734
|
@@ -165,9 +165,9 @@ diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py,sha256=RE
|
|
165
165
|
diffusers/pipelines/stable_diffusion_safe/__init__.py,sha256=FAuvPLSYCLDzJ1d2GntTwQXpxgABEaoLrj5LdQOtxpA,2502
|
166
166
|
diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py,sha256=yZMsvIdDAhF7maJOpc9UWSIUbWpIghOypzgo4-vAI0A,37886
|
167
167
|
diffusers/pipelines/stable_diffusion_safe/safety_checker.py,sha256=lEXvS-_WCcVpje14hoajJG2Z4jlWs0UsID3IqWTnOys,5049
|
168
|
-
diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256
|
169
|
-
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=
|
170
|
-
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=
|
168
|
+
diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=-RFjtUQxnCEPSF0Foq90HWIjyHblHOH0eHoNg1dqj68,953
|
169
|
+
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=MTG8Ym65rS3vvALVHWkHHP5cEKMcjpSnYHZuh_lwKcU,41570
|
170
|
+
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=USKMxXkFUeDJQr77Ecw1QlJieiet37KdQB53wWqpCc4,46168
|
171
171
|
diffusers/pipelines/stable_diffusion_xl/watermark.py,sha256=22Pg7TXApd4oRBvyJDh5B5L6--Zj7hKaYj8dHSTsGzQ,1142
|
172
172
|
diffusers/pipelines/stochastic_karras_ve/__init__.py,sha256=StxEhuNuCeEY3qv3ZIcBfXsaxDH3JmWeuHx1xCHnYRI,60
|
173
173
|
diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py,sha256=zZn4jZ8iHJSsoMvStG3l4WvQ6wAtKjK0LDjLRQA3PLU,5669
|
@@ -254,9 +254,9 @@ diffusers/utils/outputs.py,sha256=l5RdKO6SRnnz7fsXsmmnkOyCf_0z35kwfkDbnhCFeAc,36
|
|
254
254
|
diffusers/utils/pil_utils.py,sha256=F7M3QWYQyRcLNsS8876wgKqOnhzg8hNTPHQy6Q-jYj0,1423
|
255
255
|
diffusers/utils/testing_utils.py,sha256=TiKwlhR4SvEIIkAOrF11qYNg27p_tVp0ifJgEW2mNAk,21197
|
256
256
|
diffusers/utils/torch_utils.py,sha256=4gRMtlH81IrbYh_pfR0ZkDNbuxmVX03fmR6xrDTZIP0,3378
|
257
|
-
diffusers-0.18.
|
258
|
-
diffusers-0.18.
|
259
|
-
diffusers-0.18.
|
260
|
-
diffusers-0.18.
|
261
|
-
diffusers-0.18.
|
262
|
-
diffusers-0.18.
|
257
|
+
diffusers-0.18.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
258
|
+
diffusers-0.18.2.dist-info/METADATA,sha256=iO5QVnb_Ri2SVl5YaXfVn7P6JsyQJ2PiTcQ5aNNubxY,17540
|
259
|
+
diffusers-0.18.2.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
260
|
+
diffusers-0.18.2.dist-info/entry_points.txt,sha256=VULXr1th-UU5J0Ou_l0If6E4CY4HSSiMElweZ58u9H0,73
|
261
|
+
diffusers-0.18.2.dist-info/top_level.txt,sha256=axJl2884vMSvhzrFrSoht36QXA_6gZN9cKtg4xOO72o,10
|
262
|
+
diffusers-0.18.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|