diffusers 0.15.1__py3-none-any.whl → 0.16.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +7 -2
- diffusers/configuration_utils.py +4 -0
- diffusers/loaders.py +262 -12
- diffusers/models/attention.py +31 -12
- diffusers/models/attention_processor.py +189 -0
- diffusers/models/controlnet.py +9 -2
- diffusers/models/embeddings.py +66 -0
- diffusers/models/modeling_pytorch_flax_utils.py +6 -0
- diffusers/models/modeling_utils.py +5 -2
- diffusers/models/transformer_2d.py +1 -1
- diffusers/models/unet_2d_condition.py +45 -6
- diffusers/models/vae.py +3 -0
- diffusers/pipelines/__init__.py +8 -0
- diffusers/pipelines/alt_diffusion/modeling_roberta_series.py +25 -10
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +8 -0
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +8 -0
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
- diffusers/pipelines/deepfloyd_if/__init__.py +54 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +854 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +979 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1097 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1098 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1208 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +947 -0
- diffusers/pipelines/deepfloyd_if/safety_checker.py +59 -0
- diffusers/pipelines/deepfloyd_if/timesteps.py +579 -0
- diffusers/pipelines/deepfloyd_if/watermark.py +46 -0
- diffusers/pipelines/pipeline_utils.py +54 -25
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +37 -20
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_controlnet.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +12 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_controlnet.py +59 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +22 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +34 -30
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +93 -10
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +45 -6
- diffusers/schedulers/scheduling_ddpm.py +63 -16
- diffusers/schedulers/scheduling_heun_discrete.py +51 -1
- diffusers/utils/__init__.py +4 -1
- diffusers/utils/dummy_torch_and_transformers_objects.py +80 -5
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/hub_utils.py +4 -1
- diffusers/utils/import_utils.py +41 -0
- diffusers/utils/pil_utils.py +24 -0
- diffusers/utils/testing_utils.py +10 -0
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/METADATA +1 -1
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/RECORD +57 -47
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/LICENSE +0 -0
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/WHEEL +0 -0
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/top_level.txt +0 -0
@@ -162,6 +162,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
162
162
|
self.init_noise_sigma = 1.0
|
163
163
|
|
164
164
|
# setable values
|
165
|
+
self.custom_timesteps = False
|
165
166
|
self.num_inference_steps = None
|
166
167
|
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy())
|
167
168
|
|
@@ -181,31 +182,62 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
181
182
|
"""
|
182
183
|
return sample
|
183
184
|
|
184
|
-
def set_timesteps(
|
185
|
+
def set_timesteps(
|
186
|
+
self,
|
187
|
+
num_inference_steps: Optional[int] = None,
|
188
|
+
device: Union[str, torch.device] = None,
|
189
|
+
timesteps: Optional[List[int]] = None,
|
190
|
+
):
|
185
191
|
"""
|
186
192
|
Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
|
187
193
|
|
188
194
|
Args:
|
189
|
-
num_inference_steps (`int`):
|
190
|
-
the number of diffusion steps used when generating samples with a pre-trained model.
|
195
|
+
num_inference_steps (`Optional[int]`):
|
196
|
+
the number of diffusion steps used when generating samples with a pre-trained model. If passed, then
|
197
|
+
`timesteps` must be `None`.
|
198
|
+
device (`str` or `torch.device`, optional):
|
199
|
+
the device to which the timesteps are moved to.
|
200
|
+
custom_timesteps (`List[int]`, optional):
|
201
|
+
custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
|
202
|
+
timestep spacing strategy of equal spacing between timesteps is used. If passed, `num_inference_steps`
|
203
|
+
must be `None`.
|
204
|
+
|
191
205
|
"""
|
206
|
+
if num_inference_steps is not None and timesteps is not None:
|
207
|
+
raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.")
|
208
|
+
|
209
|
+
if timesteps is not None:
|
210
|
+
for i in range(1, len(timesteps)):
|
211
|
+
if timesteps[i] >= timesteps[i - 1]:
|
212
|
+
raise ValueError("`custom_timesteps` must be in descending order.")
|
213
|
+
|
214
|
+
if timesteps[0] >= self.config.num_train_timesteps:
|
215
|
+
raise ValueError(
|
216
|
+
f"`timesteps` must start before `self.config.train_timesteps`:"
|
217
|
+
f" {self.config.num_train_timesteps}."
|
218
|
+
)
|
219
|
+
|
220
|
+
timesteps = np.array(timesteps, dtype=np.int64)
|
221
|
+
self.custom_timesteps = True
|
222
|
+
else:
|
223
|
+
if num_inference_steps > self.config.num_train_timesteps:
|
224
|
+
raise ValueError(
|
225
|
+
f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
|
226
|
+
f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
|
227
|
+
f" maximal {self.config.num_train_timesteps} timesteps."
|
228
|
+
)
|
192
229
|
|
193
|
-
|
194
|
-
raise ValueError(
|
195
|
-
f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
|
196
|
-
f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
|
197
|
-
f" maximal {self.config.num_train_timesteps} timesteps."
|
198
|
-
)
|
230
|
+
self.num_inference_steps = num_inference_steps
|
199
231
|
|
200
|
-
|
232
|
+
step_ratio = self.config.num_train_timesteps // self.num_inference_steps
|
233
|
+
timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64)
|
234
|
+
self.custom_timesteps = False
|
201
235
|
|
202
|
-
step_ratio = self.config.num_train_timesteps // self.num_inference_steps
|
203
|
-
timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64)
|
204
236
|
self.timesteps = torch.from_numpy(timesteps).to(device)
|
205
237
|
|
206
238
|
def _get_variance(self, t, predicted_variance=None, variance_type=None):
|
207
|
-
|
208
|
-
|
239
|
+
prev_t = self.previous_timestep(t)
|
240
|
+
|
209
241
|
alpha_prod_t = self.alphas_cumprod[t]
|
210
242
|
alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one
|
211
243
|
current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev
|
@@ -304,8 +336,8 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
304
336
|
|
305
337
|
"""
|
306
338
|
t = timestep
|
307
|
-
|
308
|
-
prev_t =
|
339
|
+
|
340
|
+
prev_t = self.previous_timestep(t)
|
309
341
|
|
310
342
|
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]:
|
311
343
|
model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1)
|
@@ -418,3 +450,18 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
418
450
|
|
419
451
|
def __len__(self):
|
420
452
|
return self.config.num_train_timesteps
|
453
|
+
|
454
|
+
def previous_timestep(self, timestep):
|
455
|
+
if self.custom_timesteps:
|
456
|
+
index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
|
457
|
+
if index == self.timesteps.shape[0] - 1:
|
458
|
+
prev_t = torch.tensor(-1)
|
459
|
+
else:
|
460
|
+
prev_t = self.timesteps[index + 1]
|
461
|
+
else:
|
462
|
+
num_inference_steps = (
|
463
|
+
self.num_inference_steps if self.num_inference_steps else self.config.num_train_timesteps
|
464
|
+
)
|
465
|
+
prev_t = timestep - self.config.num_train_timesteps // num_inference_steps
|
466
|
+
|
467
|
+
return prev_t
|
@@ -75,7 +75,11 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
75
75
|
prediction_type (`str`, default `epsilon`, optional):
|
76
76
|
prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
|
77
77
|
process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
|
78
|
-
https://imagen.research.google/video/paper.pdf)
|
78
|
+
https://imagen.research.google/video/paper.pdf).
|
79
|
+
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
80
|
+
This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
|
81
|
+
noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
|
82
|
+
of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
|
79
83
|
"""
|
80
84
|
|
81
85
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -90,6 +94,7 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
90
94
|
beta_schedule: str = "linear",
|
91
95
|
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
92
96
|
prediction_type: str = "epsilon",
|
97
|
+
use_karras_sigmas: Optional[bool] = False,
|
93
98
|
):
|
94
99
|
if trained_betas is not None:
|
95
100
|
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
@@ -111,6 +116,7 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
111
116
|
|
112
117
|
# set all values
|
113
118
|
self.set_timesteps(num_train_timesteps, None, num_train_timesteps)
|
119
|
+
self.use_karras_sigmas = use_karras_sigmas
|
114
120
|
|
115
121
|
def index_for_timestep(self, timestep, schedule_timesteps=None):
|
116
122
|
if schedule_timesteps is None:
|
@@ -165,7 +171,13 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
165
171
|
timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
|
166
172
|
|
167
173
|
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
174
|
+
log_sigmas = np.log(sigmas)
|
168
175
|
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
176
|
+
|
177
|
+
if self.use_karras_sigmas:
|
178
|
+
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
179
|
+
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
180
|
+
|
169
181
|
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
|
170
182
|
sigmas = torch.from_numpy(sigmas).to(device=device)
|
171
183
|
self.sigmas = torch.cat([sigmas[:1], sigmas[1:-1].repeat_interleave(2), sigmas[-1:]])
|
@@ -186,6 +198,44 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
186
198
|
self.prev_derivative = None
|
187
199
|
self.dt = None
|
188
200
|
|
201
|
+
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
|
202
|
+
def _sigma_to_t(self, sigma, log_sigmas):
|
203
|
+
# get log sigma
|
204
|
+
log_sigma = np.log(sigma)
|
205
|
+
|
206
|
+
# get distribution
|
207
|
+
dists = log_sigma - log_sigmas[:, np.newaxis]
|
208
|
+
|
209
|
+
# get sigmas range
|
210
|
+
low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2)
|
211
|
+
high_idx = low_idx + 1
|
212
|
+
|
213
|
+
low = log_sigmas[low_idx]
|
214
|
+
high = log_sigmas[high_idx]
|
215
|
+
|
216
|
+
# interpolate sigmas
|
217
|
+
w = (low - log_sigma) / (low - high)
|
218
|
+
w = np.clip(w, 0, 1)
|
219
|
+
|
220
|
+
# transform interpolation to time range
|
221
|
+
t = (1 - w) * low_idx + w * high_idx
|
222
|
+
t = t.reshape(sigma.shape)
|
223
|
+
return t
|
224
|
+
|
225
|
+
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
|
226
|
+
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
227
|
+
"""Constructs the noise schedule of Karras et al. (2022)."""
|
228
|
+
|
229
|
+
sigma_min: float = in_sigmas[-1].item()
|
230
|
+
sigma_max: float = in_sigmas[0].item()
|
231
|
+
|
232
|
+
rho = 7.0 # 7.0 is the value used in the paper
|
233
|
+
ramp = np.linspace(0, 1, num_inference_steps)
|
234
|
+
min_inv_rho = sigma_min ** (1 / rho)
|
235
|
+
max_inv_rho = sigma_max ** (1 / rho)
|
236
|
+
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
237
|
+
return sigmas
|
238
|
+
|
189
239
|
@property
|
190
240
|
def state_in_first_order(self):
|
191
241
|
return self.dt is None
|
diffusers/utils/__init__.py
CHANGED
@@ -44,6 +44,7 @@ from .hub_utils import (
|
|
44
44
|
http_user_agent,
|
45
45
|
)
|
46
46
|
from .import_utils import (
|
47
|
+
BACKENDS_MAPPING,
|
47
48
|
ENV_VARS_TRUE_AND_AUTO_VALUES,
|
48
49
|
ENV_VARS_TRUE_VALUES,
|
49
50
|
USE_JAX,
|
@@ -53,7 +54,9 @@ from .import_utils import (
|
|
53
54
|
OptionalDependencyNotAvailable,
|
54
55
|
is_accelerate_available,
|
55
56
|
is_accelerate_version,
|
57
|
+
is_bs4_available,
|
56
58
|
is_flax_available,
|
59
|
+
is_ftfy_available,
|
57
60
|
is_inflect_available,
|
58
61
|
is_k_diffusion_available,
|
59
62
|
is_k_diffusion_version,
|
@@ -76,7 +79,7 @@ from .import_utils import (
|
|
76
79
|
)
|
77
80
|
from .logging import get_logger
|
78
81
|
from .outputs import BaseOutput
|
79
|
-
from .pil_utils import PIL_INTERPOLATION
|
82
|
+
from .pil_utils import PIL_INTERPOLATION, numpy_to_pil, pt_to_pil
|
80
83
|
from .torch_utils import is_compiled_module, randn_tensor
|
81
84
|
|
82
85
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
from ..utils import DummyObject, requires_backends
|
3
3
|
|
4
4
|
|
5
|
-
class
|
5
|
+
class AltDiffusionImg2ImgPipeline(metaclass=DummyObject):
|
6
6
|
_backends = ["torch", "transformers"]
|
7
7
|
|
8
8
|
def __init__(self, *args, **kwargs):
|
@@ -17,7 +17,7 @@ class TextualInversionLoaderMixin(metaclass=DummyObject):
|
|
17
17
|
requires_backends(cls, ["torch", "transformers"])
|
18
18
|
|
19
19
|
|
20
|
-
class
|
20
|
+
class AltDiffusionPipeline(metaclass=DummyObject):
|
21
21
|
_backends = ["torch", "transformers"]
|
22
22
|
|
23
23
|
def __init__(self, *args, **kwargs):
|
@@ -32,7 +32,7 @@ class AltDiffusionImg2ImgPipeline(metaclass=DummyObject):
|
|
32
32
|
requires_backends(cls, ["torch", "transformers"])
|
33
33
|
|
34
34
|
|
35
|
-
class
|
35
|
+
class AudioLDMPipeline(metaclass=DummyObject):
|
36
36
|
_backends = ["torch", "transformers"]
|
37
37
|
|
38
38
|
def __init__(self, *args, **kwargs):
|
@@ -47,7 +47,7 @@ class AltDiffusionPipeline(metaclass=DummyObject):
|
|
47
47
|
requires_backends(cls, ["torch", "transformers"])
|
48
48
|
|
49
49
|
|
50
|
-
class
|
50
|
+
class CycleDiffusionPipeline(metaclass=DummyObject):
|
51
51
|
_backends = ["torch", "transformers"]
|
52
52
|
|
53
53
|
def __init__(self, *args, **kwargs):
|
@@ -62,7 +62,82 @@ class AudioLDMPipeline(metaclass=DummyObject):
|
|
62
62
|
requires_backends(cls, ["torch", "transformers"])
|
63
63
|
|
64
64
|
|
65
|
-
class
|
65
|
+
class IFImg2ImgPipeline(metaclass=DummyObject):
|
66
|
+
_backends = ["torch", "transformers"]
|
67
|
+
|
68
|
+
def __init__(self, *args, **kwargs):
|
69
|
+
requires_backends(self, ["torch", "transformers"])
|
70
|
+
|
71
|
+
@classmethod
|
72
|
+
def from_config(cls, *args, **kwargs):
|
73
|
+
requires_backends(cls, ["torch", "transformers"])
|
74
|
+
|
75
|
+
@classmethod
|
76
|
+
def from_pretrained(cls, *args, **kwargs):
|
77
|
+
requires_backends(cls, ["torch", "transformers"])
|
78
|
+
|
79
|
+
|
80
|
+
class IFImg2ImgSuperResolutionPipeline(metaclass=DummyObject):
|
81
|
+
_backends = ["torch", "transformers"]
|
82
|
+
|
83
|
+
def __init__(self, *args, **kwargs):
|
84
|
+
requires_backends(self, ["torch", "transformers"])
|
85
|
+
|
86
|
+
@classmethod
|
87
|
+
def from_config(cls, *args, **kwargs):
|
88
|
+
requires_backends(cls, ["torch", "transformers"])
|
89
|
+
|
90
|
+
@classmethod
|
91
|
+
def from_pretrained(cls, *args, **kwargs):
|
92
|
+
requires_backends(cls, ["torch", "transformers"])
|
93
|
+
|
94
|
+
|
95
|
+
class IFInpaintingPipeline(metaclass=DummyObject):
|
96
|
+
_backends = ["torch", "transformers"]
|
97
|
+
|
98
|
+
def __init__(self, *args, **kwargs):
|
99
|
+
requires_backends(self, ["torch", "transformers"])
|
100
|
+
|
101
|
+
@classmethod
|
102
|
+
def from_config(cls, *args, **kwargs):
|
103
|
+
requires_backends(cls, ["torch", "transformers"])
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
def from_pretrained(cls, *args, **kwargs):
|
107
|
+
requires_backends(cls, ["torch", "transformers"])
|
108
|
+
|
109
|
+
|
110
|
+
class IFInpaintingSuperResolutionPipeline(metaclass=DummyObject):
|
111
|
+
_backends = ["torch", "transformers"]
|
112
|
+
|
113
|
+
def __init__(self, *args, **kwargs):
|
114
|
+
requires_backends(self, ["torch", "transformers"])
|
115
|
+
|
116
|
+
@classmethod
|
117
|
+
def from_config(cls, *args, **kwargs):
|
118
|
+
requires_backends(cls, ["torch", "transformers"])
|
119
|
+
|
120
|
+
@classmethod
|
121
|
+
def from_pretrained(cls, *args, **kwargs):
|
122
|
+
requires_backends(cls, ["torch", "transformers"])
|
123
|
+
|
124
|
+
|
125
|
+
class IFPipeline(metaclass=DummyObject):
|
126
|
+
_backends = ["torch", "transformers"]
|
127
|
+
|
128
|
+
def __init__(self, *args, **kwargs):
|
129
|
+
requires_backends(self, ["torch", "transformers"])
|
130
|
+
|
131
|
+
@classmethod
|
132
|
+
def from_config(cls, *args, **kwargs):
|
133
|
+
requires_backends(cls, ["torch", "transformers"])
|
134
|
+
|
135
|
+
@classmethod
|
136
|
+
def from_pretrained(cls, *args, **kwargs):
|
137
|
+
requires_backends(cls, ["torch", "transformers"])
|
138
|
+
|
139
|
+
|
140
|
+
class IFSuperResolutionPipeline(metaclass=DummyObject):
|
66
141
|
_backends = ["torch", "transformers"]
|
67
142
|
|
68
143
|
def __init__(self, *args, **kwargs):
|
@@ -267,7 +267,7 @@ def get_cached_module_file(
|
|
267
267
|
|
268
268
|
# retrieve github version that matches
|
269
269
|
if revision is None:
|
270
|
-
revision = latest_version if latest_version in available_versions else "main"
|
270
|
+
revision = latest_version if latest_version[1:] in available_versions else "main"
|
271
271
|
logger.info(f"Defaulting to latest_version: {revision}.")
|
272
272
|
elif revision in available_versions:
|
273
273
|
revision = f"v{revision}"
|
diffusers/utils/hub_utils.py
CHANGED
@@ -199,7 +199,10 @@ if not os.path.isfile(cache_version_file):
|
|
199
199
|
cache_version = 0
|
200
200
|
else:
|
201
201
|
with open(cache_version_file) as f:
|
202
|
-
|
202
|
+
try:
|
203
|
+
cache_version = int(f.read())
|
204
|
+
except ValueError:
|
205
|
+
cache_version = 0
|
203
206
|
|
204
207
|
if cache_version < 1:
|
205
208
|
old_cache_is_not_empty = os.path.isdir(old_diffusers_cache) and len(os.listdir(old_diffusers_cache)) > 0
|
diffusers/utils/import_utils.py
CHANGED
@@ -271,6 +271,23 @@ except importlib_metadata.PackageNotFoundError:
|
|
271
271
|
_compel_available = False
|
272
272
|
|
273
273
|
|
274
|
+
_ftfy_available = importlib.util.find_spec("ftfy") is not None
|
275
|
+
try:
|
276
|
+
_ftfy_version = importlib_metadata.version("ftfy")
|
277
|
+
logger.debug(f"Successfully imported ftfy version {_ftfy_version}")
|
278
|
+
except importlib_metadata.PackageNotFoundError:
|
279
|
+
_ftfy_available = False
|
280
|
+
|
281
|
+
|
282
|
+
_bs4_available = importlib.util.find_spec("bs4") is not None
|
283
|
+
try:
|
284
|
+
# importlib metadata under different name
|
285
|
+
_bs4_version = importlib_metadata.version("beautifulsoup4")
|
286
|
+
logger.debug(f"Successfully imported ftfy version {_bs4_version}")
|
287
|
+
except importlib_metadata.PackageNotFoundError:
|
288
|
+
_bs4_available = False
|
289
|
+
|
290
|
+
|
274
291
|
def is_torch_available():
|
275
292
|
return _torch_available
|
276
293
|
|
@@ -347,6 +364,14 @@ def is_compel_available():
|
|
347
364
|
return _compel_available
|
348
365
|
|
349
366
|
|
367
|
+
def is_ftfy_available():
|
368
|
+
return _ftfy_available
|
369
|
+
|
370
|
+
|
371
|
+
def is_bs4_available():
|
372
|
+
return _bs4_available
|
373
|
+
|
374
|
+
|
350
375
|
# docstyle-ignore
|
351
376
|
FLAX_IMPORT_ERROR = """
|
352
377
|
{0} requires the FLAX library but it was not found in your environment. Checkout the instructions on the
|
@@ -437,8 +462,23 @@ COMPEL_IMPORT_ERROR = """
|
|
437
462
|
{0} requires the compel library but it was not found in your environment. You can install it with pip: `pip install compel`
|
438
463
|
"""
|
439
464
|
|
465
|
+
# docstyle-ignore
|
466
|
+
BS4_IMPORT_ERROR = """
|
467
|
+
{0} requires the Beautiful Soup library but it was not found in your environment. You can install it with pip:
|
468
|
+
`pip install beautifulsoup4`. Please note that you may need to restart your runtime after installation.
|
469
|
+
"""
|
470
|
+
|
471
|
+
# docstyle-ignore
|
472
|
+
FTFY_IMPORT_ERROR = """
|
473
|
+
{0} requires the ftfy library but it was not found in your environment. Checkout the instructions on the
|
474
|
+
installation section: https://github.com/rspeer/python-ftfy/tree/master#installing and follow the ones
|
475
|
+
that match your environment. Please note that you may need to restart your runtime after installation.
|
476
|
+
"""
|
477
|
+
|
478
|
+
|
440
479
|
BACKENDS_MAPPING = OrderedDict(
|
441
480
|
[
|
481
|
+
("bs4", (is_bs4_available, BS4_IMPORT_ERROR)),
|
442
482
|
("flax", (is_flax_available, FLAX_IMPORT_ERROR)),
|
443
483
|
("inflect", (is_inflect_available, INFLECT_IMPORT_ERROR)),
|
444
484
|
("onnx", (is_onnx_available, ONNX_IMPORT_ERROR)),
|
@@ -454,6 +494,7 @@ BACKENDS_MAPPING = OrderedDict(
|
|
454
494
|
("omegaconf", (is_omegaconf_available, OMEGACONF_IMPORT_ERROR)),
|
455
495
|
("tensorboard", (_tensorboard_available, TENSORBOARD_IMPORT_ERROR)),
|
456
496
|
("compel", (_compel_available, COMPEL_IMPORT_ERROR)),
|
497
|
+
("ftfy", (is_ftfy_available, FTFY_IMPORT_ERROR)),
|
457
498
|
]
|
458
499
|
)
|
459
500
|
|
diffusers/utils/pil_utils.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import PIL.Image
|
2
2
|
import PIL.ImageOps
|
3
3
|
from packaging import version
|
4
|
+
from PIL import Image
|
4
5
|
|
5
6
|
|
6
7
|
if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
|
@@ -19,3 +20,26 @@ else:
|
|
19
20
|
"lanczos": PIL.Image.LANCZOS,
|
20
21
|
"nearest": PIL.Image.NEAREST,
|
21
22
|
}
|
23
|
+
|
24
|
+
|
25
|
+
def pt_to_pil(images):
|
26
|
+
images = (images / 2 + 0.5).clamp(0, 1)
|
27
|
+
images = images.cpu().permute(0, 2, 3, 1).float().numpy()
|
28
|
+
images = numpy_to_pil(images)
|
29
|
+
return images
|
30
|
+
|
31
|
+
|
32
|
+
def numpy_to_pil(images):
|
33
|
+
"""
|
34
|
+
Convert a numpy image or a batch of images to a PIL image.
|
35
|
+
"""
|
36
|
+
if images.ndim == 3:
|
37
|
+
images = images[None, ...]
|
38
|
+
images = (images * 255).round().astype("uint8")
|
39
|
+
if images.shape[-1] == 1:
|
40
|
+
# special case for grayscale (single channel) images
|
41
|
+
pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images]
|
42
|
+
else:
|
43
|
+
pil_images = [Image.fromarray(image) for image in images]
|
44
|
+
|
45
|
+
return pil_images
|
diffusers/utils/testing_utils.py
CHANGED
@@ -279,6 +279,16 @@ def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image:
|
|
279
279
|
return image
|
280
280
|
|
281
281
|
|
282
|
+
def preprocess_image(image: PIL.Image, batch_size: int):
|
283
|
+
w, h = image.size
|
284
|
+
w, h = (x - x % 8 for x in (w, h)) # resize to integer multiple of 8
|
285
|
+
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
|
286
|
+
image = np.array(image).astype(np.float32) / 255.0
|
287
|
+
image = np.vstack([image[None].transpose(0, 3, 1, 2)] * batch_size)
|
288
|
+
image = torch.from_numpy(image)
|
289
|
+
return 2.0 * image - 1.0
|
290
|
+
|
291
|
+
|
282
292
|
def export_to_video(video_frames: List[np.ndarray], output_video_path: str = None) -> str:
|
283
293
|
if is_opencv_available():
|
284
294
|
import cv2
|