diffusers 0.18.0__py3-none-any.whl → 0.18.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +1 -1
- diffusers/configuration_utils.py +2 -2
- diffusers/loaders.py +5 -5
- diffusers/models/attention_flax.py +10 -5
- diffusers/models/modeling_utils.py +1 -1
- diffusers/pipelines/pipeline_utils.py +13 -4
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +87 -34
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +11 -7
- diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -5
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +72 -40
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +53 -42
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/METADATA +1 -1
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/RECORD +18 -18
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
- {diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
diffusers/__init__.py
CHANGED
diffusers/configuration_utils.py
CHANGED
@@ -607,7 +607,7 @@ def register_to_config(init):
|
|
607
607
|
|
608
608
|
# Take note of the parameters that were not present in the loaded config
|
609
609
|
if len(set(new_kwargs.keys()) - set(init_kwargs)) > 0:
|
610
|
-
new_kwargs["_use_default_values"] = set(new_kwargs.keys()) - set(init_kwargs)
|
610
|
+
new_kwargs["_use_default_values"] = list(set(new_kwargs.keys()) - set(init_kwargs))
|
611
611
|
|
612
612
|
new_kwargs = {**config_init_kwargs, **new_kwargs}
|
613
613
|
getattr(self, "register_to_config")(**new_kwargs)
|
@@ -655,7 +655,7 @@ def flax_register_to_config(cls):
|
|
655
655
|
|
656
656
|
# Take note of the parameters that were not present in the loaded config
|
657
657
|
if len(set(new_kwargs.keys()) - set(init_kwargs)) > 0:
|
658
|
-
new_kwargs["_use_default_values"] = set(new_kwargs.keys()) - set(init_kwargs)
|
658
|
+
new_kwargs["_use_default_values"] = list(set(new_kwargs.keys()) - set(init_kwargs))
|
659
659
|
|
660
660
|
getattr(self, "register_to_config")(**new_kwargs)
|
661
661
|
original_init(self, *args, **kwargs)
|
diffusers/loaders.py
CHANGED
@@ -177,7 +177,7 @@ class UNet2DConditionLoadersMixin:
|
|
177
177
|
|
178
178
|
if use_safetensors and not is_safetensors_available():
|
179
179
|
raise ValueError(
|
180
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
180
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
181
181
|
)
|
182
182
|
|
183
183
|
allow_pickle = False
|
@@ -589,7 +589,7 @@ class TextualInversionLoaderMixin:
|
|
589
589
|
|
590
590
|
if use_safetensors and not is_safetensors_available():
|
591
591
|
raise ValueError(
|
592
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
592
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
593
593
|
)
|
594
594
|
|
595
595
|
allow_pickle = False
|
@@ -806,7 +806,7 @@ class LoraLoaderMixin:
|
|
806
806
|
|
807
807
|
if use_safetensors and not is_safetensors_available():
|
808
808
|
raise ValueError(
|
809
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
809
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
810
810
|
)
|
811
811
|
|
812
812
|
allow_pickle = False
|
@@ -1054,7 +1054,7 @@ class LoraLoaderMixin:
|
|
1054
1054
|
|
1055
1055
|
if use_safetensors and not is_safetensors_available():
|
1056
1056
|
raise ValueError(
|
1057
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
1057
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
1058
1058
|
)
|
1059
1059
|
|
1060
1060
|
allow_pickle = False
|
@@ -1394,7 +1394,7 @@ class FromSingleFileMixin:
|
|
1394
1394
|
use_auth_token = kwargs.pop("use_auth_token", None)
|
1395
1395
|
revision = kwargs.pop("revision", None)
|
1396
1396
|
extract_ema = kwargs.pop("extract_ema", False)
|
1397
|
-
image_size = kwargs.pop("image_size",
|
1397
|
+
image_size = kwargs.pop("image_size", None)
|
1398
1398
|
scheduler_type = kwargs.pop("scheduler_type", "pndm")
|
1399
1399
|
num_in_channels = kwargs.pop("num_in_channels", None)
|
1400
1400
|
upcast_attention = kwargs.pop("upcast_attention", None)
|
@@ -152,6 +152,7 @@ class FlaxAttention(nn.Module):
|
|
152
152
|
self.value = nn.Dense(inner_dim, use_bias=False, dtype=self.dtype, name="to_v")
|
153
153
|
|
154
154
|
self.proj_attn = nn.Dense(self.query_dim, dtype=self.dtype, name="to_out_0")
|
155
|
+
self.dropout_layer = nn.Dropout(rate=self.dropout)
|
155
156
|
|
156
157
|
def reshape_heads_to_batch_dim(self, tensor):
|
157
158
|
batch_size, seq_len, dim = tensor.shape
|
@@ -214,7 +215,7 @@ class FlaxAttention(nn.Module):
|
|
214
215
|
|
215
216
|
hidden_states = self.reshape_batch_dim_to_heads(hidden_states)
|
216
217
|
hidden_states = self.proj_attn(hidden_states)
|
217
|
-
return hidden_states
|
218
|
+
return self.dropout_layer(hidden_states, deterministic=deterministic)
|
218
219
|
|
219
220
|
|
220
221
|
class FlaxBasicTransformerBlock(nn.Module):
|
@@ -260,6 +261,7 @@ class FlaxBasicTransformerBlock(nn.Module):
|
|
260
261
|
self.norm1 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
|
261
262
|
self.norm2 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
|
262
263
|
self.norm3 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
|
264
|
+
self.dropout_layer = nn.Dropout(rate=self.dropout)
|
263
265
|
|
264
266
|
def __call__(self, hidden_states, context, deterministic=True):
|
265
267
|
# self attention
|
@@ -280,7 +282,7 @@ class FlaxBasicTransformerBlock(nn.Module):
|
|
280
282
|
hidden_states = self.ff(self.norm3(hidden_states), deterministic=deterministic)
|
281
283
|
hidden_states = hidden_states + residual
|
282
284
|
|
283
|
-
return hidden_states
|
285
|
+
return self.dropout_layer(hidden_states, deterministic=deterministic)
|
284
286
|
|
285
287
|
|
286
288
|
class FlaxTransformer2DModel(nn.Module):
|
@@ -356,6 +358,8 @@ class FlaxTransformer2DModel(nn.Module):
|
|
356
358
|
dtype=self.dtype,
|
357
359
|
)
|
358
360
|
|
361
|
+
self.dropout_layer = nn.Dropout(rate=self.dropout)
|
362
|
+
|
359
363
|
def __call__(self, hidden_states, context, deterministic=True):
|
360
364
|
batch, height, width, channels = hidden_states.shape
|
361
365
|
residual = hidden_states
|
@@ -378,7 +382,7 @@ class FlaxTransformer2DModel(nn.Module):
|
|
378
382
|
hidden_states = self.proj_out(hidden_states)
|
379
383
|
|
380
384
|
hidden_states = hidden_states + residual
|
381
|
-
return hidden_states
|
385
|
+
return self.dropout_layer(hidden_states, deterministic=deterministic)
|
382
386
|
|
383
387
|
|
384
388
|
class FlaxFeedForward(nn.Module):
|
@@ -409,7 +413,7 @@ class FlaxFeedForward(nn.Module):
|
|
409
413
|
self.net_2 = nn.Dense(self.dim, dtype=self.dtype)
|
410
414
|
|
411
415
|
def __call__(self, hidden_states, deterministic=True):
|
412
|
-
hidden_states = self.net_0(hidden_states)
|
416
|
+
hidden_states = self.net_0(hidden_states, deterministic=deterministic)
|
413
417
|
hidden_states = self.net_2(hidden_states)
|
414
418
|
return hidden_states
|
415
419
|
|
@@ -434,8 +438,9 @@ class FlaxGEGLU(nn.Module):
|
|
434
438
|
def setup(self):
|
435
439
|
inner_dim = self.dim * 4
|
436
440
|
self.proj = nn.Dense(inner_dim * 2, dtype=self.dtype)
|
441
|
+
self.dropout_layer = nn.Dropout(rate=self.dropout)
|
437
442
|
|
438
443
|
def __call__(self, hidden_states, deterministic=True):
|
439
444
|
hidden_states = self.proj(hidden_states)
|
440
445
|
hidden_linear, hidden_gelu = jnp.split(hidden_states, 2, axis=2)
|
441
|
-
return hidden_linear * nn.gelu(hidden_gelu)
|
446
|
+
return self.dropout_layer(hidden_linear * nn.gelu(hidden_gelu), deterministic=deterministic)
|
@@ -456,7 +456,7 @@ class ModelMixin(torch.nn.Module):
|
|
456
456
|
|
457
457
|
if use_safetensors and not is_safetensors_available():
|
458
458
|
raise ValueError(
|
459
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
459
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
460
460
|
)
|
461
461
|
|
462
462
|
allow_pickle = False
|
@@ -204,7 +204,7 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
|
|
204
204
|
transformers_index_format = r"\d{5}-of-\d{5}"
|
205
205
|
|
206
206
|
if variant is not None:
|
207
|
-
# `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.
|
207
|
+
# `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.safetensors`
|
208
208
|
variant_file_re = re.compile(
|
209
209
|
rf"({'|'.join(weight_prefixes)})\.({variant}|{variant}-{transformers_index_format})\.({'|'.join(weight_suffixs)})$"
|
210
210
|
)
|
@@ -213,7 +213,7 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
|
|
213
213
|
rf"({'|'.join(weight_prefixes)})\.({'|'.join(weight_suffixs)})\.index\.{variant}\.json$"
|
214
214
|
)
|
215
215
|
|
216
|
-
# `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.
|
216
|
+
# `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.safetensors`
|
217
217
|
non_variant_file_re = re.compile(
|
218
218
|
rf"({'|'.join(weight_prefixes)})(-{transformers_index_format})?\.({'|'.join(weight_suffixs)})$"
|
219
219
|
)
|
@@ -1168,7 +1168,7 @@ class DiffusionPipeline(ConfigMixin):
|
|
1168
1168
|
|
1169
1169
|
if use_safetensors and not is_safetensors_available():
|
1170
1170
|
raise ValueError(
|
1171
|
-
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install
|
1171
|
+
"`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
|
1172
1172
|
)
|
1173
1173
|
|
1174
1174
|
allow_pickle = False
|
@@ -1213,6 +1213,15 @@ class DiffusionPipeline(ConfigMixin):
|
|
1213
1213
|
filenames = {sibling.rfilename for sibling in info.siblings}
|
1214
1214
|
model_filenames, variant_filenames = variant_compatible_siblings(filenames, variant=variant)
|
1215
1215
|
|
1216
|
+
if len(variant_filenames) == 0 and variant is not None:
|
1217
|
+
deprecation_message = (
|
1218
|
+
f"You are trying to load the model files of the `variant={variant}`, but no such modeling files are available."
|
1219
|
+
f"The default model files: {model_filenames} will be loaded instead. Make sure to not load from `variant={variant}`"
|
1220
|
+
"if such variant modeling files are not available. Doing so will lead to an error in v0.22.0 as defaulting to non-variant"
|
1221
|
+
"modeling files is deprecated."
|
1222
|
+
)
|
1223
|
+
deprecate("no variant default", "0.22.0", deprecation_message, standard_warn=False)
|
1224
|
+
|
1216
1225
|
# remove ignored filenames
|
1217
1226
|
model_filenames = set(model_filenames) - set(ignore_filenames)
|
1218
1227
|
variant_filenames = set(variant_filenames) - set(ignore_filenames)
|
@@ -1302,7 +1311,7 @@ class DiffusionPipeline(ConfigMixin):
|
|
1302
1311
|
snapshot_folder = Path(config_file).parent
|
1303
1312
|
pipeline_is_cached = all((snapshot_folder / f).is_file() for f in expected_files)
|
1304
1313
|
|
1305
|
-
if pipeline_is_cached:
|
1314
|
+
if pipeline_is_cached and not force_download:
|
1306
1315
|
# if the pipeline is cached, we can directly return it
|
1307
1316
|
# else call snapshot_download
|
1308
1317
|
return snapshot_folder
|
@@ -24,6 +24,7 @@ from transformers import (
|
|
24
24
|
AutoFeatureExtractor,
|
25
25
|
BertTokenizerFast,
|
26
26
|
CLIPImageProcessor,
|
27
|
+
CLIPTextConfig,
|
27
28
|
CLIPTextModel,
|
28
29
|
CLIPTextModelWithProjection,
|
29
30
|
CLIPTokenizer,
|
@@ -48,7 +49,7 @@ from ...schedulers import (
|
|
48
49
|
PNDMScheduler,
|
49
50
|
UnCLIPScheduler,
|
50
51
|
)
|
51
|
-
from ...utils import is_omegaconf_available, is_safetensors_available, logging
|
52
|
+
from ...utils import is_accelerate_available, is_omegaconf_available, is_safetensors_available, logging
|
52
53
|
from ...utils.import_utils import BACKENDS_MAPPING
|
53
54
|
from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
|
54
55
|
from ..paint_by_example import PaintByExampleImageEncoder
|
@@ -57,6 +58,10 @@ from .safety_checker import StableDiffusionSafetyChecker
|
|
57
58
|
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
|
58
59
|
|
59
60
|
|
61
|
+
if is_accelerate_available():
|
62
|
+
from accelerate import init_empty_weights
|
63
|
+
from accelerate.utils import set_module_tensor_to_device
|
64
|
+
|
60
65
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
61
66
|
|
62
67
|
|
@@ -391,8 +396,8 @@ def convert_ldm_unet_checkpoint(
|
|
391
396
|
|
392
397
|
# at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
|
393
398
|
if sum(k.startswith("model_ema") for k in keys) > 100 and extract_ema:
|
394
|
-
|
395
|
-
|
399
|
+
logger.warning(f"Checkpoint {path} has both EMA and non-EMA weights.")
|
400
|
+
logger.warning(
|
396
401
|
"In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
|
397
402
|
" weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
|
398
403
|
)
|
@@ -402,7 +407,7 @@ def convert_ldm_unet_checkpoint(
|
|
402
407
|
unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key)
|
403
408
|
else:
|
404
409
|
if sum(k.startswith("model_ema") for k in keys) > 100:
|
405
|
-
|
410
|
+
logger.warning(
|
406
411
|
"In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
|
407
412
|
" weights (usually better for inference), please make sure to add the `--extract_ema` flag."
|
408
413
|
)
|
@@ -770,11 +775,12 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
|
|
770
775
|
|
771
776
|
|
772
777
|
def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder=None):
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
+
if text_encoder is None:
|
779
|
+
config_name = "openai/clip-vit-large-patch14"
|
780
|
+
config = CLIPTextConfig.from_pretrained(config_name)
|
781
|
+
|
782
|
+
with init_empty_weights():
|
783
|
+
text_model = CLIPTextModel(config)
|
778
784
|
|
779
785
|
keys = list(checkpoint.keys())
|
780
786
|
|
@@ -787,7 +793,8 @@ def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder
|
|
787
793
|
if key.startswith(prefix):
|
788
794
|
text_model_dict[key[len(prefix + ".") :]] = checkpoint[key]
|
789
795
|
|
790
|
-
|
796
|
+
for param_name, param in text_model_dict.items():
|
797
|
+
set_module_tensor_to_device(text_model, param_name, "cpu", value=param)
|
791
798
|
|
792
799
|
return text_model
|
793
800
|
|
@@ -884,14 +891,26 @@ def convert_paint_by_example_checkpoint(checkpoint):
|
|
884
891
|
return model
|
885
892
|
|
886
893
|
|
887
|
-
def convert_open_clip_checkpoint(
|
894
|
+
def convert_open_clip_checkpoint(
|
895
|
+
checkpoint, config_name, prefix="cond_stage_model.model.", has_projection=False, **config_kwargs
|
896
|
+
):
|
888
897
|
# text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder")
|
889
|
-
text_model = CLIPTextModelWithProjection.from_pretrained(
|
890
|
-
|
891
|
-
)
|
898
|
+
# text_model = CLIPTextModelWithProjection.from_pretrained(
|
899
|
+
# "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", projection_dim=1280
|
900
|
+
# )
|
901
|
+
config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs)
|
902
|
+
|
903
|
+
with init_empty_weights():
|
904
|
+
text_model = CLIPTextModelWithProjection(config) if has_projection else CLIPTextModel(config)
|
892
905
|
|
893
906
|
keys = list(checkpoint.keys())
|
894
907
|
|
908
|
+
keys_to_ignore = []
|
909
|
+
if config_name == "stabilityai/stable-diffusion-2" and config.num_hidden_layers == 23:
|
910
|
+
# make sure to remove all keys > 22
|
911
|
+
keys_to_ignore += [k for k in keys if k.startswith("cond_stage_model.model.transformer.resblocks.23")]
|
912
|
+
keys_to_ignore += ["cond_stage_model.model.text_projection"]
|
913
|
+
|
895
914
|
text_model_dict = {}
|
896
915
|
|
897
916
|
if prefix + "text_projection" in checkpoint:
|
@@ -902,8 +921,8 @@ def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
|
|
902
921
|
text_model_dict["text_model.embeddings.position_ids"] = text_model.text_model.embeddings.get_buffer("position_ids")
|
903
922
|
|
904
923
|
for key in keys:
|
905
|
-
|
906
|
-
|
924
|
+
if key in keys_to_ignore:
|
925
|
+
continue
|
907
926
|
if key[len(prefix) :] in textenc_conversion_map:
|
908
927
|
if key.endswith("text_projection"):
|
909
928
|
value = checkpoint[key].T
|
@@ -931,7 +950,8 @@ def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
|
|
931
950
|
|
932
951
|
text_model_dict[new_key] = checkpoint[key]
|
933
952
|
|
934
|
-
|
953
|
+
for param_name, param in text_model_dict.items():
|
954
|
+
set_module_tensor_to_device(text_model, param_name, "cpu", value=param)
|
935
955
|
|
936
956
|
return text_model
|
937
957
|
|
@@ -1061,7 +1081,7 @@ def convert_controlnet_checkpoint(
|
|
1061
1081
|
def download_from_original_stable_diffusion_ckpt(
|
1062
1082
|
checkpoint_path: str,
|
1063
1083
|
original_config_file: str = None,
|
1064
|
-
image_size: int =
|
1084
|
+
image_size: Optional[int] = None,
|
1065
1085
|
prediction_type: str = None,
|
1066
1086
|
model_type: str = None,
|
1067
1087
|
extract_ema: bool = False,
|
@@ -1144,6 +1164,7 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1144
1164
|
LDMTextToImagePipeline,
|
1145
1165
|
PaintByExamplePipeline,
|
1146
1166
|
StableDiffusionControlNetPipeline,
|
1167
|
+
StableDiffusionInpaintPipeline,
|
1147
1168
|
StableDiffusionPipeline,
|
1148
1169
|
StableDiffusionXLImg2ImgPipeline,
|
1149
1170
|
StableDiffusionXLPipeline,
|
@@ -1166,12 +1187,9 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1166
1187
|
if not is_safetensors_available():
|
1167
1188
|
raise ValueError(BACKENDS_MAPPING["safetensors"][1])
|
1168
1189
|
|
1169
|
-
from safetensors import
|
1190
|
+
from safetensors.torch import load_file as safe_load
|
1170
1191
|
|
1171
|
-
checkpoint =
|
1172
|
-
with safe_open(checkpoint_path, framework="pt", device="cpu") as f:
|
1173
|
-
for key in f.keys():
|
1174
|
-
checkpoint[key] = f.get_tensor(key)
|
1192
|
+
checkpoint = safe_load(checkpoint_path, device="cpu")
|
1175
1193
|
else:
|
1176
1194
|
if device is None:
|
1177
1195
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -1183,7 +1201,7 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1183
1201
|
if "global_step" in checkpoint:
|
1184
1202
|
global_step = checkpoint["global_step"]
|
1185
1203
|
else:
|
1186
|
-
|
1204
|
+
logger.debug("global_step key not found in model")
|
1187
1205
|
global_step = None
|
1188
1206
|
|
1189
1207
|
# NOTE: this while loop isn't great but this controlnet checkpoint has one additional
|
@@ -1230,8 +1248,15 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1230
1248
|
model_type = "SDXL"
|
1231
1249
|
else:
|
1232
1250
|
model_type = "SDXL-Refiner"
|
1251
|
+
if image_size is None:
|
1252
|
+
image_size = 1024
|
1233
1253
|
|
1234
|
-
if num_in_channels is
|
1254
|
+
if num_in_channels is None and pipeline_class == StableDiffusionInpaintPipeline:
|
1255
|
+
num_in_channels = 9
|
1256
|
+
elif num_in_channels is None:
|
1257
|
+
num_in_channels = 4
|
1258
|
+
|
1259
|
+
if "unet_config" in original_config.model.params:
|
1235
1260
|
original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels
|
1236
1261
|
|
1237
1262
|
if (
|
@@ -1263,7 +1288,6 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1263
1288
|
num_train_timesteps = getattr(original_config.model.params, "timesteps", None) or 1000
|
1264
1289
|
|
1265
1290
|
if model_type in ["SDXL", "SDXL-Refiner"]:
|
1266
|
-
image_size = 1024
|
1267
1291
|
scheduler_dict = {
|
1268
1292
|
"beta_schedule": "scaled_linear",
|
1269
1293
|
"beta_start": 0.00085,
|
@@ -1279,7 +1303,6 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1279
1303
|
}
|
1280
1304
|
scheduler = EulerDiscreteScheduler.from_config(scheduler_dict)
|
1281
1305
|
scheduler_type = "euler"
|
1282
|
-
vae_path = "stabilityai/sdxl-vae"
|
1283
1306
|
else:
|
1284
1307
|
beta_start = getattr(original_config.model.params, "linear_start", None) or 0.02
|
1285
1308
|
beta_end = getattr(original_config.model.params, "linear_end", None) or 0.085
|
@@ -1318,25 +1341,45 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1318
1341
|
# Convert the UNet2DConditionModel model.
|
1319
1342
|
unet_config = create_unet_diffusers_config(original_config, image_size=image_size)
|
1320
1343
|
unet_config["upcast_attention"] = upcast_attention
|
1321
|
-
|
1344
|
+
with init_empty_weights():
|
1345
|
+
unet = UNet2DConditionModel(**unet_config)
|
1322
1346
|
|
1323
1347
|
converted_unet_checkpoint = convert_ldm_unet_checkpoint(
|
1324
1348
|
checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema
|
1325
1349
|
)
|
1326
|
-
|
1350
|
+
|
1351
|
+
for param_name, param in converted_unet_checkpoint.items():
|
1352
|
+
set_module_tensor_to_device(unet, param_name, "cpu", value=param)
|
1327
1353
|
|
1328
1354
|
# Convert the VAE model.
|
1329
1355
|
if vae_path is None:
|
1330
1356
|
vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
|
1331
1357
|
converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
|
1332
1358
|
|
1333
|
-
|
1334
|
-
|
1359
|
+
if (
|
1360
|
+
"model" in original_config
|
1361
|
+
and "params" in original_config.model
|
1362
|
+
and "scale_factor" in original_config.model.params
|
1363
|
+
):
|
1364
|
+
vae_scaling_factor = original_config.model.params.scale_factor
|
1365
|
+
else:
|
1366
|
+
vae_scaling_factor = 0.18215 # default SD scaling factor
|
1367
|
+
|
1368
|
+
vae_config["scaling_factor"] = vae_scaling_factor
|
1369
|
+
|
1370
|
+
with init_empty_weights():
|
1371
|
+
vae = AutoencoderKL(**vae_config)
|
1372
|
+
|
1373
|
+
for param_name, param in converted_vae_checkpoint.items():
|
1374
|
+
set_module_tensor_to_device(vae, param_name, "cpu", value=param)
|
1335
1375
|
else:
|
1336
1376
|
vae = AutoencoderKL.from_pretrained(vae_path)
|
1337
1377
|
|
1338
1378
|
if model_type == "FrozenOpenCLIPEmbedder":
|
1339
|
-
|
1379
|
+
config_name = "stabilityai/stable-diffusion-2"
|
1380
|
+
config_kwargs = {"subfolder": "text_encoder"}
|
1381
|
+
|
1382
|
+
text_model = convert_open_clip_checkpoint(checkpoint, config_name, **config_kwargs)
|
1340
1383
|
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer")
|
1341
1384
|
|
1342
1385
|
if stable_unclip is None:
|
@@ -1469,7 +1512,12 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1469
1512
|
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
1470
1513
|
text_encoder = convert_ldm_clip_checkpoint(checkpoint, local_files_only=local_files_only)
|
1471
1514
|
tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
|
1472
|
-
|
1515
|
+
|
1516
|
+
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
|
1517
|
+
config_kwargs = {"projection_dim": 1280}
|
1518
|
+
text_encoder_2 = convert_open_clip_checkpoint(
|
1519
|
+
checkpoint, config_name, prefix="conditioner.embedders.1.model.", has_projection=True, **config_kwargs
|
1520
|
+
)
|
1473
1521
|
|
1474
1522
|
pipe = StableDiffusionXLPipeline(
|
1475
1523
|
vae=vae,
|
@@ -1485,7 +1533,12 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1485
1533
|
tokenizer = None
|
1486
1534
|
text_encoder = None
|
1487
1535
|
tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
|
1488
|
-
|
1536
|
+
|
1537
|
+
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
|
1538
|
+
config_kwargs = {"projection_dim": 1280}
|
1539
|
+
text_encoder_2 = convert_open_clip_checkpoint(
|
1540
|
+
checkpoint, config_name, prefix="conditioner.embedders.0.model.", has_projection=True, **config_kwargs
|
1541
|
+
)
|
1489
1542
|
|
1490
1543
|
pipe = StableDiffusionXLImg2ImgPipeline(
|
1491
1544
|
vae=vae,
|
@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
|
24
24
|
|
25
25
|
from ...configuration_utils import FrozenDict
|
26
26
|
from ...image_processor import VaeImageProcessor
|
27
|
-
from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
|
27
|
+
from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
28
28
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
29
29
|
from ...schedulers import KarrasDiffusionSchedulers
|
30
30
|
from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging, randn_tensor
|
@@ -153,7 +153,9 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image: bool
|
|
153
153
|
return mask, masked_image
|
154
154
|
|
155
155
|
|
156
|
-
class StableDiffusionInpaintPipeline(
|
156
|
+
class StableDiffusionInpaintPipeline(
|
157
|
+
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
|
158
|
+
):
|
157
159
|
r"""
|
158
160
|
Pipeline for text-guided image inpainting using Stable Diffusion.
|
159
161
|
|
@@ -748,15 +748,19 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
|
|
748
748
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
749
749
|
self.vae.to(dtype=torch.float32)
|
750
750
|
|
751
|
-
use_torch_2_0_or_xformers =
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
751
|
+
use_torch_2_0_or_xformers = isinstance(
|
752
|
+
self.vae.decoder.mid_block.attentions[0].processor,
|
753
|
+
(
|
754
|
+
AttnProcessor2_0,
|
755
|
+
XFormersAttnProcessor,
|
756
|
+
LoRAXFormersAttnProcessor,
|
757
|
+
LoRAAttnProcessor2_0,
|
758
|
+
),
|
759
|
+
)
|
760
|
+
|
757
761
|
# if xformers or torch_2_0 is used attention block does not need
|
758
762
|
# to be in float32 which can save lots of memory
|
759
|
-
if
|
763
|
+
if use_torch_2_0_or_xformers:
|
760
764
|
self.vae.post_quant_conv.to(latents.dtype)
|
761
765
|
self.vae.decoder.conv_in.to(latents.dtype)
|
762
766
|
self.vae.decoder.mid_block.to(latents.dtype)
|
@@ -8,7 +8,6 @@ from ...utils import BaseOutput, is_invisible_watermark_available, is_torch_avai
|
|
8
8
|
|
9
9
|
|
10
10
|
@dataclass
|
11
|
-
# Copied from diffusers.pipelines.stable_diffusion.__init__.StableDiffusionPipelineOutput with StableDiffusion->StableDiffusionXL
|
12
11
|
class StableDiffusionXLPipelineOutput(BaseOutput):
|
13
12
|
"""
|
14
13
|
Output class for Stable Diffusion pipelines.
|
@@ -17,13 +16,9 @@ class StableDiffusionXLPipelineOutput(BaseOutput):
|
|
17
16
|
images (`List[PIL.Image.Image]` or `np.ndarray`)
|
18
17
|
List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
|
19
18
|
num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
|
20
|
-
nsfw_content_detected (`List[bool]`)
|
21
|
-
List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
22
|
-
(nsfw) content, or `None` if safety checking could not be performed.
|
23
19
|
"""
|
24
20
|
|
25
21
|
images: Union[List[PIL.Image.Image], np.ndarray]
|
26
|
-
nsfw_content_detected: Optional[List[bool]]
|
27
22
|
|
28
23
|
|
29
24
|
if is_transformers_available() and is_torch_available() and is_invisible_watermark_available():
|
@@ -129,9 +129,11 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
129
129
|
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
130
130
|
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
131
131
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
132
|
+
self.default_sample_size = self.unet.config.sample_size
|
132
133
|
|
133
134
|
self.watermark = StableDiffusionXLWatermarker()
|
134
135
|
|
136
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
135
137
|
def enable_vae_slicing(self):
|
136
138
|
r"""
|
137
139
|
Enable sliced VAE decoding.
|
@@ -141,6 +143,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
141
143
|
"""
|
142
144
|
self.vae.enable_slicing()
|
143
145
|
|
146
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
|
144
147
|
def disable_vae_slicing(self):
|
145
148
|
r"""
|
146
149
|
Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
|
@@ -148,6 +151,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
148
151
|
"""
|
149
152
|
self.vae.disable_slicing()
|
150
153
|
|
154
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
|
151
155
|
def enable_vae_tiling(self):
|
152
156
|
r"""
|
153
157
|
Enable tiled VAE decoding.
|
@@ -157,6 +161,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
157
161
|
"""
|
158
162
|
self.vae.enable_tiling()
|
159
163
|
|
164
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
|
160
165
|
def disable_vae_tiling(self):
|
161
166
|
r"""
|
162
167
|
Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
|
@@ -183,7 +188,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
183
188
|
self.to("cpu", silence_dtype_warnings=True)
|
184
189
|
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
|
185
190
|
|
186
|
-
for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
|
191
|
+
for cpu_offloaded_model in [self.unet, self.text_encoder, self.text_encoder_2, self.vae]:
|
187
192
|
cpu_offload(cpu_offloaded_model, device)
|
188
193
|
|
189
194
|
def enable_model_cpu_offload(self, gpu_id=0):
|
@@ -217,6 +222,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
217
222
|
self.final_offload_hook = hook
|
218
223
|
|
219
224
|
@property
|
225
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
|
220
226
|
def _execution_device(self):
|
221
227
|
r"""
|
222
228
|
Returns the device on which the pipeline's models will be executed. After calling
|
@@ -237,12 +243,14 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
237
243
|
def encode_prompt(
|
238
244
|
self,
|
239
245
|
prompt,
|
240
|
-
device,
|
241
|
-
num_images_per_prompt,
|
242
|
-
do_classifier_free_guidance,
|
246
|
+
device: Optional[torch.device] = None,
|
247
|
+
num_images_per_prompt: int = 1,
|
248
|
+
do_classifier_free_guidance: bool = True,
|
243
249
|
negative_prompt=None,
|
244
250
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
245
251
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
252
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
253
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
246
254
|
lora_scale: Optional[float] = None,
|
247
255
|
):
|
248
256
|
r"""
|
@@ -268,9 +276,18 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
268
276
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
269
277
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
270
278
|
argument.
|
279
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
280
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
281
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
282
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
283
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
284
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
285
|
+
input argument.
|
271
286
|
lora_scale (`float`, *optional*):
|
272
287
|
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
273
288
|
"""
|
289
|
+
device = device or self._execution_device
|
290
|
+
|
274
291
|
# set lora scale so that monkey patched LoRA
|
275
292
|
# function of text encoder can correctly access it
|
276
293
|
if lora_scale is not None and isinstance(self, LoraLoaderMixin):
|
@@ -399,6 +416,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
399
416
|
|
400
417
|
negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
|
401
418
|
|
419
|
+
bs_embed = pooled_prompt_embeds.shape[0]
|
402
420
|
pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
|
403
421
|
bs_embed * num_images_per_prompt, -1
|
404
422
|
)
|
@@ -408,20 +426,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
408
426
|
|
409
427
|
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
410
428
|
|
411
|
-
|
412
|
-
if self.safety_checker is None:
|
413
|
-
has_nsfw_concept = None
|
414
|
-
else:
|
415
|
-
if torch.is_tensor(image):
|
416
|
-
feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
|
417
|
-
else:
|
418
|
-
feature_extractor_input = self.image_processor.numpy_to_pil(image)
|
419
|
-
safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
|
420
|
-
image, has_nsfw_concept = self.safety_checker(
|
421
|
-
images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
|
422
|
-
)
|
423
|
-
return image, has_nsfw_concept
|
424
|
-
|
429
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
425
430
|
def prepare_extra_step_kwargs(self, generator, eta):
|
426
431
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
427
432
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
@@ -448,6 +453,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
448
453
|
negative_prompt=None,
|
449
454
|
prompt_embeds=None,
|
450
455
|
negative_prompt_embeds=None,
|
456
|
+
pooled_prompt_embeds=None,
|
457
|
+
negative_pooled_prompt_embeds=None,
|
451
458
|
):
|
452
459
|
if height % 8 != 0 or width % 8 != 0:
|
453
460
|
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
|
@@ -486,6 +493,17 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
486
493
|
f" {negative_prompt_embeds.shape}."
|
487
494
|
)
|
488
495
|
|
496
|
+
if prompt_embeds is not None and pooled_prompt_embeds is None:
|
497
|
+
raise ValueError(
|
498
|
+
"If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
|
499
|
+
)
|
500
|
+
|
501
|
+
if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
|
502
|
+
raise ValueError(
|
503
|
+
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
504
|
+
)
|
505
|
+
|
506
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
489
507
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
490
508
|
shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
|
491
509
|
if isinstance(generator, list) and len(generator) != batch_size:
|
@@ -535,6 +553,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
535
553
|
latents: Optional[torch.FloatTensor] = None,
|
536
554
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
537
555
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
556
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
557
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
538
558
|
output_type: Optional[str] = "pil",
|
539
559
|
return_dict: bool = True,
|
540
560
|
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
@@ -588,6 +608,13 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
588
608
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
589
609
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
590
610
|
argument.
|
611
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
612
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
613
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
614
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
615
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
616
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
617
|
+
input argument.
|
591
618
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
592
619
|
The output format of the generate image. Choose between
|
593
620
|
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
@@ -626,15 +653,23 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
626
653
|
"not-safe-for-work" (nsfw) content, according to the `safety_checker`.
|
627
654
|
"""
|
628
655
|
# 0. Default height and width to unet
|
629
|
-
height = height or self.
|
630
|
-
width = width or self.
|
656
|
+
height = height or self.default_sample_size * self.vae_scale_factor
|
657
|
+
width = width or self.default_sample_size * self.vae_scale_factor
|
631
658
|
|
632
659
|
original_size = original_size or (height, width)
|
633
660
|
target_size = target_size or (height, width)
|
634
661
|
|
635
662
|
# 1. Check inputs. Raise error if not correct
|
636
663
|
self.check_inputs(
|
637
|
-
prompt,
|
664
|
+
prompt,
|
665
|
+
height,
|
666
|
+
width,
|
667
|
+
callback_steps,
|
668
|
+
negative_prompt,
|
669
|
+
prompt_embeds,
|
670
|
+
negative_prompt_embeds,
|
671
|
+
pooled_prompt_embeds,
|
672
|
+
negative_pooled_prompt_embeds,
|
638
673
|
)
|
639
674
|
|
640
675
|
# 2. Define call parameters
|
@@ -669,6 +704,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
669
704
|
negative_prompt,
|
670
705
|
prompt_embeds=prompt_embeds,
|
671
706
|
negative_prompt_embeds=negative_prompt_embeds,
|
707
|
+
pooled_prompt_embeds=pooled_prompt_embeds,
|
708
|
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
672
709
|
lora_scale=text_encoder_lora_scale,
|
673
710
|
)
|
674
711
|
|
@@ -749,15 +786,18 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
749
786
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
750
787
|
self.vae.to(dtype=torch.float32)
|
751
788
|
|
752
|
-
use_torch_2_0_or_xformers =
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
789
|
+
use_torch_2_0_or_xformers = isinstance(
|
790
|
+
self.vae.decoder.mid_block.attentions[0].processor,
|
791
|
+
(
|
792
|
+
AttnProcessor2_0,
|
793
|
+
XFormersAttnProcessor,
|
794
|
+
LoRAXFormersAttnProcessor,
|
795
|
+
LoRAAttnProcessor2_0,
|
796
|
+
),
|
797
|
+
)
|
758
798
|
# if xformers or torch_2_0 is used attention block does not need
|
759
799
|
# to be in float32 which can save lots of memory
|
760
|
-
if
|
800
|
+
if use_torch_2_0_or_xformers:
|
761
801
|
self.vae.post_quant_conv.to(latents.dtype)
|
762
802
|
self.vae.decoder.conv_in.to(latents.dtype)
|
763
803
|
self.vae.decoder.mid_block.to(latents.dtype)
|
@@ -765,27 +805,19 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
765
805
|
latents = latents.float()
|
766
806
|
|
767
807
|
if not output_type == "latent":
|
768
|
-
# CHECK there is problem here (PVP)
|
769
808
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
770
|
-
has_nsfw_concept = None
|
771
809
|
else:
|
772
810
|
image = latents
|
773
|
-
|
774
|
-
return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=None)
|
775
|
-
|
776
|
-
if has_nsfw_concept is None:
|
777
|
-
do_denormalize = [True] * image.shape[0]
|
778
|
-
else:
|
779
|
-
do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
|
811
|
+
return StableDiffusionXLPipelineOutput(images=image)
|
780
812
|
|
781
813
|
image = self.watermark.apply_watermark(image)
|
782
|
-
image = self.image_processor.postprocess(image, output_type=output_type
|
814
|
+
image = self.image_processor.postprocess(image, output_type=output_type)
|
783
815
|
|
784
816
|
# Offload last model to CPU
|
785
817
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
786
818
|
self.final_offload_hook.offload()
|
787
819
|
|
788
820
|
if not return_dict:
|
789
|
-
return (image,
|
821
|
+
return (image,)
|
790
822
|
|
791
|
-
return StableDiffusionXLPipelineOutput(images=image
|
823
|
+
return StableDiffusionXLPipelineOutput(images=image)
|
@@ -140,6 +140,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
140
140
|
|
141
141
|
self.watermark = StableDiffusionXLWatermarker()
|
142
142
|
|
143
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
143
144
|
def enable_vae_slicing(self):
|
144
145
|
r"""
|
145
146
|
Enable sliced VAE decoding.
|
@@ -149,6 +150,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
149
150
|
"""
|
150
151
|
self.vae.enable_slicing()
|
151
152
|
|
153
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
|
152
154
|
def disable_vae_slicing(self):
|
153
155
|
r"""
|
154
156
|
Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
|
@@ -156,6 +158,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
156
158
|
"""
|
157
159
|
self.vae.disable_slicing()
|
158
160
|
|
161
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
|
159
162
|
def enable_vae_tiling(self):
|
160
163
|
r"""
|
161
164
|
Enable tiled VAE decoding.
|
@@ -165,6 +168,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
165
168
|
"""
|
166
169
|
self.vae.enable_tiling()
|
167
170
|
|
171
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
|
168
172
|
def disable_vae_tiling(self):
|
169
173
|
r"""
|
170
174
|
Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
|
@@ -172,6 +176,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
172
176
|
"""
|
173
177
|
self.vae.disable_tiling()
|
174
178
|
|
179
|
+
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_sequential_cpu_offload
|
175
180
|
def enable_sequential_cpu_offload(self, gpu_id=0):
|
176
181
|
r"""
|
177
182
|
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
@@ -191,9 +196,10 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
191
196
|
self.to("cpu", silence_dtype_warnings=True)
|
192
197
|
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
|
193
198
|
|
194
|
-
for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
|
199
|
+
for cpu_offloaded_model in [self.unet, self.text_encoder, self.text_encoder_2, self.vae]:
|
195
200
|
cpu_offload(cpu_offloaded_model, device)
|
196
201
|
|
202
|
+
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_model_cpu_offload
|
197
203
|
def enable_model_cpu_offload(self, gpu_id=0):
|
198
204
|
r"""
|
199
205
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
@@ -225,6 +231,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
225
231
|
self.final_offload_hook = hook
|
226
232
|
|
227
233
|
@property
|
234
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
|
228
235
|
def _execution_device(self):
|
229
236
|
r"""
|
230
237
|
Returns the device on which the pipeline's models will be executed. After calling
|
@@ -242,15 +249,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
242
249
|
return torch.device(module._hf_hook.execution_device)
|
243
250
|
return self.device
|
244
251
|
|
252
|
+
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
|
245
253
|
def encode_prompt(
|
246
254
|
self,
|
247
255
|
prompt,
|
248
|
-
device,
|
249
|
-
num_images_per_prompt,
|
250
|
-
do_classifier_free_guidance,
|
256
|
+
device: Optional[torch.device] = None,
|
257
|
+
num_images_per_prompt: int = 1,
|
258
|
+
do_classifier_free_guidance: bool = True,
|
251
259
|
negative_prompt=None,
|
252
260
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
253
261
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
262
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
263
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
254
264
|
lora_scale: Optional[float] = None,
|
255
265
|
):
|
256
266
|
r"""
|
@@ -276,9 +286,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
276
286
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
277
287
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
278
288
|
argument.
|
289
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
290
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
291
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
292
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
293
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
294
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
295
|
+
input argument.
|
279
296
|
lora_scale (`float`, *optional*):
|
280
297
|
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
281
298
|
"""
|
299
|
+
device = device or self._execution_device
|
300
|
+
|
282
301
|
# set lora scale so that monkey patched LoRA
|
283
302
|
# function of text encoder can correctly access it
|
284
303
|
if lora_scale is not None and isinstance(self, LoraLoaderMixin):
|
@@ -327,13 +346,11 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
327
346
|
text_input_ids.to(device),
|
328
347
|
output_hidden_states=True,
|
329
348
|
)
|
349
|
+
|
330
350
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
331
351
|
pooled_prompt_embeds = prompt_embeds[0]
|
332
|
-
|
333
352
|
prompt_embeds = prompt_embeds.hidden_states[-2]
|
334
353
|
|
335
|
-
prompt_embeds = prompt_embeds
|
336
|
-
|
337
354
|
bs_embed, seq_len, _ = prompt_embeds.shape
|
338
355
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
339
356
|
prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
@@ -349,10 +366,9 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
349
366
|
negative_prompt_embeds = torch.zeros_like(prompt_embeds)
|
350
367
|
negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
|
351
368
|
elif do_classifier_free_guidance and negative_prompt_embeds is None:
|
369
|
+
negative_prompt = negative_prompt or ""
|
352
370
|
uncond_tokens: List[str]
|
353
|
-
if
|
354
|
-
uncond_tokens = [""] * batch_size
|
355
|
-
elif prompt is not None and type(prompt) is not type(negative_prompt):
|
371
|
+
if prompt is not None and type(prompt) is not type(negative_prompt):
|
356
372
|
raise TypeError(
|
357
373
|
f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
358
374
|
f" {type(prompt)}."
|
@@ -389,7 +405,6 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
389
405
|
)
|
390
406
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
391
407
|
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
|
392
|
-
|
393
408
|
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
|
394
409
|
|
395
410
|
if do_classifier_free_guidance:
|
@@ -411,6 +426,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
411
426
|
|
412
427
|
negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
|
413
428
|
|
429
|
+
bs_embed = pooled_prompt_embeds.shape[0]
|
414
430
|
pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
|
415
431
|
bs_embed * num_images_per_prompt, -1
|
416
432
|
)
|
@@ -420,20 +436,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
420
436
|
|
421
437
|
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
422
438
|
|
423
|
-
|
424
|
-
if self.safety_checker is None:
|
425
|
-
has_nsfw_concept = None
|
426
|
-
else:
|
427
|
-
if torch.is_tensor(image):
|
428
|
-
feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
|
429
|
-
else:
|
430
|
-
feature_extractor_input = self.image_processor.numpy_to_pil(image)
|
431
|
-
safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
|
432
|
-
image, has_nsfw_concept = self.safety_checker(
|
433
|
-
images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
|
434
|
-
)
|
435
|
-
return image, has_nsfw_concept
|
436
|
-
|
439
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
437
440
|
def prepare_extra_step_kwargs(self, generator, eta):
|
438
441
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
439
442
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
@@ -624,6 +627,8 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
624
627
|
latents: Optional[torch.FloatTensor] = None,
|
625
628
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
626
629
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
630
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
631
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
627
632
|
output_type: Optional[str] = "pil",
|
628
633
|
return_dict: bool = True,
|
629
634
|
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
@@ -683,6 +688,13 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
683
688
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
684
689
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
685
690
|
argument.
|
691
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
692
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
693
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
694
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
695
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
696
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
697
|
+
input argument.
|
686
698
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
687
699
|
The output format of the generate image. Choose between
|
688
700
|
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
@@ -759,6 +771,8 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
759
771
|
negative_prompt,
|
760
772
|
prompt_embeds=prompt_embeds,
|
761
773
|
negative_prompt_embeds=negative_prompt_embeds,
|
774
|
+
pooled_prompt_embeds=pooled_prompt_embeds,
|
775
|
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
762
776
|
lora_scale=text_encoder_lora_scale,
|
763
777
|
)
|
764
778
|
|
@@ -845,15 +859,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
845
859
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
846
860
|
self.vae.to(dtype=torch.float32)
|
847
861
|
|
848
|
-
use_torch_2_0_or_xformers =
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
862
|
+
use_torch_2_0_or_xformers = isinstance(
|
863
|
+
self.vae.decoder.mid_block.attentions[0].processor,
|
864
|
+
(
|
865
|
+
AttnProcessor2_0,
|
866
|
+
XFormersAttnProcessor,
|
867
|
+
LoRAXFormersAttnProcessor,
|
868
|
+
LoRAAttnProcessor2_0,
|
869
|
+
),
|
870
|
+
)
|
854
871
|
# if xformers or torch_2_0 is used attention block does not need
|
855
872
|
# to be in float32 which can save lots of memory
|
856
|
-
if
|
873
|
+
if use_torch_2_0_or_xformers:
|
857
874
|
self.vae.post_quant_conv.to(latents.dtype)
|
858
875
|
self.vae.decoder.conv_in.to(latents.dtype)
|
859
876
|
self.vae.decoder.mid_block.to(latents.dtype)
|
@@ -862,24 +879,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
|
|
862
879
|
|
863
880
|
if not output_type == "latent":
|
864
881
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
865
|
-
has_nsfw_concept = None
|
866
882
|
else:
|
867
883
|
image = latents
|
868
|
-
return StableDiffusionXLPipelineOutput(images=image
|
869
|
-
|
870
|
-
if has_nsfw_concept is None:
|
871
|
-
do_denormalize = [True] * image.shape[0]
|
872
|
-
else:
|
873
|
-
do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
|
884
|
+
return StableDiffusionXLPipelineOutput(images=image)
|
874
885
|
|
875
886
|
image = self.watermark.apply_watermark(image)
|
876
|
-
image = self.image_processor.postprocess(image, output_type=output_type
|
887
|
+
image = self.image_processor.postprocess(image, output_type=output_type)
|
877
888
|
|
878
889
|
# Offload last model to CPU
|
879
890
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
880
891
|
self.final_offload_hook.offload()
|
881
892
|
|
882
893
|
if not return_dict:
|
883
|
-
return (image,
|
894
|
+
return (image,)
|
884
895
|
|
885
|
-
return StableDiffusionXLPipelineOutput(images=image
|
896
|
+
return StableDiffusionXLPipelineOutput(images=image)
|
@@ -1,9 +1,9 @@
|
|
1
|
-
diffusers/__init__.py,sha256=
|
2
|
-
diffusers/configuration_utils.py,sha256
|
1
|
+
diffusers/__init__.py,sha256=Vtoe0ie8nREHRwBNNwzbyQ2rwqLTcB4399y6DBFTOok,9329
|
2
|
+
diffusers/configuration_utils.py,sha256=--Nwf_FViQXq71M8PcgUUjT_YoLV1WYqV49Fnk-amkk,30369
|
3
3
|
diffusers/dependency_versions_check.py,sha256=T2AQMFfOGMCULAqRAE8zf1VE5j7GFxxs7SfEuhTY4lA,1756
|
4
4
|
diffusers/dependency_versions_table.py,sha256=TnzJqBXnJYMXeMw61Lgq_QlTkjWydwOKDIKbV0RXG6Q,1446
|
5
5
|
diffusers/image_processor.py,sha256=VqdToqZY-Xdb0sqibwVn1A9gdGOU3OvgQpr67mnMWGg,13700
|
6
|
-
diffusers/loaders.py,sha256=
|
6
|
+
diffusers/loaders.py,sha256=9trJ4QdgKOmfqguAKHq73fu5VDjw13krtgyJq7AnpQw,75161
|
7
7
|
diffusers/optimization.py,sha256=KZpFO98pzgt1l-etti_7k5c-EK9WEY3-XossN6VEGrs,14546
|
8
8
|
diffusers/pipeline_utils.py,sha256=dJVuXQ_ZBHkW64dwPbIPM51QnqQKIp9-WSIhRQYlJg4,1147
|
9
9
|
diffusers/training_utils.py,sha256=TEuw7ro2RT35ujfMW2DKzb1KZpF4-HfuKSZ1NNnIIvI,13195
|
@@ -16,7 +16,7 @@ diffusers/experimental/rl/value_guided_sampling.py,sha256=iIhf1gc2QP7Jx4HrsoOyRC
|
|
16
16
|
diffusers/models/__init__.py,sha256=MDG83d8C1YGGSnGNwi9sG6c33_FEaMGS3BVGnaqWJqQ,1446
|
17
17
|
diffusers/models/activations.py,sha256=cWe7qw4wR626ADw-abcV3lI1v5Vim_R_eNMc5jPlaLo,297
|
18
18
|
diffusers/models/attention.py,sha256=Nfmze9IvGR5a6ir9o0Z4DbAQ8repJxBo2_t4fDsnvHw,15197
|
19
|
-
diffusers/models/attention_flax.py,sha256=
|
19
|
+
diffusers/models/attention_flax.py,sha256=6IOINRK5flDgnzsLiSLIfhBnDtdY9LyhcDIUXVS_Gag,18142
|
20
20
|
diffusers/models/attention_processor.py,sha256=04g9405fWhb-C0xO9cnn-LfAMcSwxZ9fOzYrX98aa6A,70119
|
21
21
|
diffusers/models/autoencoder_kl.py,sha256=qM2oRqJROHvA3PSwMDmNISQzK3oFmgJiRRzvHZw9dHQ,17913
|
22
22
|
diffusers/models/controlnet.py,sha256=OzCVtpmlJXTfIze3Bmc6p7lGFirxvlI-MroHL7HQ5mQ,33086
|
@@ -28,7 +28,7 @@ diffusers/models/embeddings_flax.py,sha256=87ysODCdTERpYfH-EDhElOUyCAu8z6-xIQCqL
|
|
28
28
|
diffusers/models/modeling_flax_pytorch_utils.py,sha256=yFQHU86DdvrzFLfkTbyZZ0_PWKrjnp08s46dD-wf_tw,4601
|
29
29
|
diffusers/models/modeling_flax_utils.py,sha256=0ailGzoCLU5-81rn048e2UJEr0S1lHGBQGqpOJzWfWQ,26071
|
30
30
|
diffusers/models/modeling_pytorch_flax_utils.py,sha256=5dt6mC956MYrIMp8Owvx8QQv8xsfik6vu0frgb_c6HE,6974
|
31
|
-
diffusers/models/modeling_utils.py,sha256=
|
31
|
+
diffusers/models/modeling_utils.py,sha256=vHf-AWIwuTvyjtOCbTryupWmQLxiujNhBfVL0hmop_k,46588
|
32
32
|
diffusers/models/prior_transformer.py,sha256=5A8Tgq4VXkjH0ib05kPHXPObekLYdrRwuCgnGvoMVN4,16574
|
33
33
|
diffusers/models/resnet.py,sha256=y9FIuXYUTHYA3AFUeDBwiHJVu0crM0fMRnzEJ3ZtVf4,35294
|
34
34
|
diffusers/models/resnet_flax.py,sha256=VKF-ti1jlH_GnlWRy9dY6ETc-W9ZitfQoNjmrFAQxuU,4021
|
@@ -50,7 +50,7 @@ diffusers/models/vq_model.py,sha256=_98GsNUGg3HxcC97zQSgxEPVuDNvn1DcJP6TCTpGLVE,
|
|
50
50
|
diffusers/pipelines/__init__.py,sha256=pjJh4SXSHjSBtzzAsiuQp64YQ03xPMdgTzK-0-iV9Ew,7009
|
51
51
|
diffusers/pipelines/onnx_utils.py,sha256=M-6GBVRFji_ik5x1CMxrz9r5oEBr9TTblqLsI1HfiS4,8282
|
52
52
|
diffusers/pipelines/pipeline_flax_utils.py,sha256=CLjAhcwfBJ1xTbdRbyWHGdcd5uRJDoXDdxruuK2t2iM,25924
|
53
|
-
diffusers/pipelines/pipeline_utils.py,sha256=
|
53
|
+
diffusers/pipelines/pipeline_utils.py,sha256=2P6oTVvZcs33-LoWUQosYkdsl1bEKE3MfnQdhkjubRw,72464
|
54
54
|
diffusers/pipelines/alt_diffusion/__init__.py,sha256=rCOBtGQ7xi3DahUXY8r5ICt_t6S0ogp4uDJL9q4avso,1346
|
55
55
|
diffusers/pipelines/alt_diffusion/modeling_roberta_series.py,sha256=_UC4IxHAg2QAFtw4yCvo2eLIDBRmg2JvvtOr6k5PFC8,5580
|
56
56
|
diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py,sha256=YgUvsnah_cIXNwzJgxE87fftWox7leXOY8lzZeph7c8,40641
|
@@ -127,7 +127,7 @@ diffusers/pipelines/spectrogram_diffusion/midi_utils.py,sha256=HmOSMSaKZlloW8J6m
|
|
127
127
|
diffusers/pipelines/spectrogram_diffusion/notes_encoder.py,sha256=Yq3W0lkAMGhx5pGklTvomBHjqR1nAVALBcYlzZBSQ90,2921
|
128
128
|
diffusers/pipelines/spectrogram_diffusion/pipeline_spectrogram_diffusion.py,sha256=GAHovdMWMhmGgS02kFOaS7_Lq9AJmTxrBZC0VElHwBQ,8657
|
129
129
|
diffusers/pipelines/stable_diffusion/__init__.py,sha256=nBYUiO6TbCsqNfImNCPi1aE-Q35Lc5r9B7qWb9TDjcM,6164
|
130
|
-
diffusers/pipelines/stable_diffusion/convert_from_ckpt.py,sha256=
|
130
|
+
diffusers/pipelines/stable_diffusion/convert_from_ckpt.py,sha256=wYHT2MGLa6LFcmlvtxgDCQ5tqZCqejur2hN-0YL0GsA,69501
|
131
131
|
diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py,sha256=sA76ZiTUVOTiCMDss7z3nouqg8czJwBmhX7OPuYheWk,43554
|
132
132
|
diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py,sha256=Pbprq5sXlbS6JPP44eOzzm0FrwsccrHoaXuFWY_Kx38,20922
|
133
133
|
diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_controlnet.py,sha256=rjxVeyAmMTQiTiQF7Q9y3BYU45jCImKZGx37ir8zpM8,1257
|
@@ -145,7 +145,7 @@ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py,sha2
|
|
145
145
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py,sha256=-k3M22p4KXDkUJAnxj4xU9VM_QBBqs_pV0XHlfYzsKk,80921
|
146
146
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py,sha256=xOvGZvBBvB1Ee_9B46bKpRK5C9SuxbNZhrQ44nvIYsQ,23137
|
147
147
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py,sha256=XPNN7lINZoezjS2ciifKfIvRXotUbB_dyByaAPOE3Vs,42149
|
148
|
-
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py,sha256=
|
148
|
+
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py,sha256=0Q9m9yJ5rED3U1vyjmrHVft9jIqB35KkfDerc_VYJNg,55140
|
149
149
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py,sha256=gzklmzg25O3y48wNubrl_jzKwzZlgiperjyV6OqdvxU,42148
|
150
150
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py,sha256=g8yEk161s1CO8NDVm6ZTMdONtilXDIKQWI6dMnRk6Bs,40919
|
151
151
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py,sha256=UM-T14SgwlIk9is3QDQx7IsTWU-_ZsD1iA4rCO9-3Rk,31679
|
@@ -156,7 +156,7 @@ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py,sha25
|
|
156
156
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py,sha256=cnwmjjTQA5lMd-pK-MhnwLF8bK-pwZu0S3xiHLO2WFk,43302
|
157
157
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py,sha256=_EtteuRKzWruht97EpQO7zrqoyhGyZUUgDFuw04mW5M,62705
|
158
158
|
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py,sha256=xiBT7l8nha5HLg2MEeOYozUwmNIUZzbW1mjgt6b7JU8,40491
|
159
|
-
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py,sha256=
|
159
|
+
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py,sha256=TOJge0fcAt10pg-hJdjhQdMi53pBzBrcx7x6vIGMZVc,38610
|
160
160
|
diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py,sha256=ef5W_IynHqocHCWJcX32MUxRPvShH3spsv0RA_lBlTk,46256
|
161
161
|
diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py,sha256=YSwcDOwL1sE6ItdYm1ZuYb3uZVAf-DzuhtndV_Auqzw,40305
|
162
162
|
diffusers/pipelines/stable_diffusion/safety_checker.py,sha256=zLs3meGi6JiRYlHntPiBEaU9_JjYcZnzrPa5picFiG4,5734
|
@@ -165,9 +165,9 @@ diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py,sha256=RE
|
|
165
165
|
diffusers/pipelines/stable_diffusion_safe/__init__.py,sha256=FAuvPLSYCLDzJ1d2GntTwQXpxgABEaoLrj5LdQOtxpA,2502
|
166
166
|
diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py,sha256=yZMsvIdDAhF7maJOpc9UWSIUbWpIghOypzgo4-vAI0A,37886
|
167
167
|
diffusers/pipelines/stable_diffusion_safe/safety_checker.py,sha256=lEXvS-_WCcVpje14hoajJG2Z4jlWs0UsID3IqWTnOys,5049
|
168
|
-
diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256
|
169
|
-
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=
|
170
|
-
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=
|
168
|
+
diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=-RFjtUQxnCEPSF0Foq90HWIjyHblHOH0eHoNg1dqj68,953
|
169
|
+
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=MTG8Ym65rS3vvALVHWkHHP5cEKMcjpSnYHZuh_lwKcU,41570
|
170
|
+
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=USKMxXkFUeDJQr77Ecw1QlJieiet37KdQB53wWqpCc4,46168
|
171
171
|
diffusers/pipelines/stable_diffusion_xl/watermark.py,sha256=22Pg7TXApd4oRBvyJDh5B5L6--Zj7hKaYj8dHSTsGzQ,1142
|
172
172
|
diffusers/pipelines/stochastic_karras_ve/__init__.py,sha256=StxEhuNuCeEY3qv3ZIcBfXsaxDH3JmWeuHx1xCHnYRI,60
|
173
173
|
diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py,sha256=zZn4jZ8iHJSsoMvStG3l4WvQ6wAtKjK0LDjLRQA3PLU,5669
|
@@ -254,9 +254,9 @@ diffusers/utils/outputs.py,sha256=l5RdKO6SRnnz7fsXsmmnkOyCf_0z35kwfkDbnhCFeAc,36
|
|
254
254
|
diffusers/utils/pil_utils.py,sha256=F7M3QWYQyRcLNsS8876wgKqOnhzg8hNTPHQy6Q-jYj0,1423
|
255
255
|
diffusers/utils/testing_utils.py,sha256=TiKwlhR4SvEIIkAOrF11qYNg27p_tVp0ifJgEW2mNAk,21197
|
256
256
|
diffusers/utils/torch_utils.py,sha256=4gRMtlH81IrbYh_pfR0ZkDNbuxmVX03fmR6xrDTZIP0,3378
|
257
|
-
diffusers-0.18.
|
258
|
-
diffusers-0.18.
|
259
|
-
diffusers-0.18.
|
260
|
-
diffusers-0.18.
|
261
|
-
diffusers-0.18.
|
262
|
-
diffusers-0.18.
|
257
|
+
diffusers-0.18.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
258
|
+
diffusers-0.18.2.dist-info/METADATA,sha256=iO5QVnb_Ri2SVl5YaXfVn7P6JsyQJ2PiTcQ5aNNubxY,17540
|
259
|
+
diffusers-0.18.2.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
260
|
+
diffusers-0.18.2.dist-info/entry_points.txt,sha256=VULXr1th-UU5J0Ou_l0If6E4CY4HSSiMElweZ58u9H0,73
|
261
|
+
diffusers-0.18.2.dist-info/top_level.txt,sha256=axJl2884vMSvhzrFrSoht36QXA_6gZN9cKtg4xOO72o,10
|
262
|
+
diffusers-0.18.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|