diffusers 0.15.1__py3-none-any.whl → 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +7 -2
- diffusers/configuration_utils.py +4 -0
- diffusers/loaders.py +262 -12
- diffusers/models/attention.py +31 -12
- diffusers/models/attention_processor.py +189 -0
- diffusers/models/controlnet.py +9 -2
- diffusers/models/embeddings.py +66 -0
- diffusers/models/modeling_pytorch_flax_utils.py +6 -0
- diffusers/models/modeling_utils.py +5 -2
- diffusers/models/transformer_2d.py +1 -1
- diffusers/models/unet_2d_condition.py +45 -6
- diffusers/models/vae.py +3 -0
- diffusers/pipelines/__init__.py +8 -0
- diffusers/pipelines/alt_diffusion/modeling_roberta_series.py +25 -10
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +8 -0
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +8 -0
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
- diffusers/pipelines/deepfloyd_if/__init__.py +54 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +854 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +979 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1097 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1098 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1208 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +947 -0
- diffusers/pipelines/deepfloyd_if/safety_checker.py +59 -0
- diffusers/pipelines/deepfloyd_if/timesteps.py +579 -0
- diffusers/pipelines/deepfloyd_if/watermark.py +46 -0
- diffusers/pipelines/pipeline_utils.py +54 -25
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +37 -20
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_controlnet.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +12 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_controlnet.py +59 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +22 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +34 -30
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +93 -10
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +45 -6
- diffusers/schedulers/scheduling_ddpm.py +63 -16
- diffusers/schedulers/scheduling_heun_discrete.py +51 -1
- diffusers/utils/__init__.py +4 -1
- diffusers/utils/dummy_torch_and_transformers_objects.py +80 -5
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/hub_utils.py +4 -1
- diffusers/utils/import_utils.py +41 -0
- diffusers/utils/pil_utils.py +24 -0
- diffusers/utils/testing_utils.py +10 -0
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/METADATA +1 -1
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/RECORD +57 -47
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/LICENSE +0 -0
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/WHEEL +0 -0
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.15.1.dist-info → diffusers-0.16.1.dist-info}/top_level.txt +0 -0
@@ -162,6 +162,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
162
162
|
self.init_noise_sigma = 1.0
|
163
163
|
|
164
164
|
# setable values
|
165
|
+
self.custom_timesteps = False
|
165
166
|
self.num_inference_steps = None
|
166
167
|
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy())
|
167
168
|
|
@@ -181,31 +182,62 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
181
182
|
"""
|
182
183
|
return sample
|
183
184
|
|
184
|
-
def set_timesteps(
|
185
|
+
def set_timesteps(
|
186
|
+
self,
|
187
|
+
num_inference_steps: Optional[int] = None,
|
188
|
+
device: Union[str, torch.device] = None,
|
189
|
+
timesteps: Optional[List[int]] = None,
|
190
|
+
):
|
185
191
|
"""
|
186
192
|
Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
|
187
193
|
|
188
194
|
Args:
|
189
|
-
num_inference_steps (`int`):
|
190
|
-
the number of diffusion steps used when generating samples with a pre-trained model.
|
195
|
+
num_inference_steps (`Optional[int]`):
|
196
|
+
the number of diffusion steps used when generating samples with a pre-trained model. If passed, then
|
197
|
+
`timesteps` must be `None`.
|
198
|
+
device (`str` or `torch.device`, optional):
|
199
|
+
the device to which the timesteps are moved to.
|
200
|
+
custom_timesteps (`List[int]`, optional):
|
201
|
+
custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
|
202
|
+
timestep spacing strategy of equal spacing between timesteps is used. If passed, `num_inference_steps`
|
203
|
+
must be `None`.
|
204
|
+
|
191
205
|
"""
|
206
|
+
if num_inference_steps is not None and timesteps is not None:
|
207
|
+
raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.")
|
208
|
+
|
209
|
+
if timesteps is not None:
|
210
|
+
for i in range(1, len(timesteps)):
|
211
|
+
if timesteps[i] >= timesteps[i - 1]:
|
212
|
+
raise ValueError("`custom_timesteps` must be in descending order.")
|
213
|
+
|
214
|
+
if timesteps[0] >= self.config.num_train_timesteps:
|
215
|
+
raise ValueError(
|
216
|
+
f"`timesteps` must start before `self.config.train_timesteps`:"
|
217
|
+
f" {self.config.num_train_timesteps}."
|
218
|
+
)
|
219
|
+
|
220
|
+
timesteps = np.array(timesteps, dtype=np.int64)
|
221
|
+
self.custom_timesteps = True
|
222
|
+
else:
|
223
|
+
if num_inference_steps > self.config.num_train_timesteps:
|
224
|
+
raise ValueError(
|
225
|
+
f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
|
226
|
+
f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
|
227
|
+
f" maximal {self.config.num_train_timesteps} timesteps."
|
228
|
+
)
|
192
229
|
|
193
|
-
|
194
|
-
raise ValueError(
|
195
|
-
f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
|
196
|
-
f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
|
197
|
-
f" maximal {self.config.num_train_timesteps} timesteps."
|
198
|
-
)
|
230
|
+
self.num_inference_steps = num_inference_steps
|
199
231
|
|
200
|
-
|
232
|
+
step_ratio = self.config.num_train_timesteps // self.num_inference_steps
|
233
|
+
timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64)
|
234
|
+
self.custom_timesteps = False
|
201
235
|
|
202
|
-
step_ratio = self.config.num_train_timesteps // self.num_inference_steps
|
203
|
-
timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64)
|
204
236
|
self.timesteps = torch.from_numpy(timesteps).to(device)
|
205
237
|
|
206
238
|
def _get_variance(self, t, predicted_variance=None, variance_type=None):
|
207
|
-
|
208
|
-
|
239
|
+
prev_t = self.previous_timestep(t)
|
240
|
+
|
209
241
|
alpha_prod_t = self.alphas_cumprod[t]
|
210
242
|
alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one
|
211
243
|
current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev
|
@@ -304,8 +336,8 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
304
336
|
|
305
337
|
"""
|
306
338
|
t = timestep
|
307
|
-
|
308
|
-
prev_t =
|
339
|
+
|
340
|
+
prev_t = self.previous_timestep(t)
|
309
341
|
|
310
342
|
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]:
|
311
343
|
model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1)
|
@@ -418,3 +450,18 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
418
450
|
|
419
451
|
def __len__(self):
|
420
452
|
return self.config.num_train_timesteps
|
453
|
+
|
454
|
+
def previous_timestep(self, timestep):
|
455
|
+
if self.custom_timesteps:
|
456
|
+
index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
|
457
|
+
if index == self.timesteps.shape[0] - 1:
|
458
|
+
prev_t = torch.tensor(-1)
|
459
|
+
else:
|
460
|
+
prev_t = self.timesteps[index + 1]
|
461
|
+
else:
|
462
|
+
num_inference_steps = (
|
463
|
+
self.num_inference_steps if self.num_inference_steps else self.config.num_train_timesteps
|
464
|
+
)
|
465
|
+
prev_t = timestep - self.config.num_train_timesteps // num_inference_steps
|
466
|
+
|
467
|
+
return prev_t
|
@@ -75,7 +75,11 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
75
75
|
prediction_type (`str`, default `epsilon`, optional):
|
76
76
|
prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
|
77
77
|
process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
|
78
|
-
https://imagen.research.google/video/paper.pdf)
|
78
|
+
https://imagen.research.google/video/paper.pdf).
|
79
|
+
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
80
|
+
This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
|
81
|
+
noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
|
82
|
+
of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
|
79
83
|
"""
|
80
84
|
|
81
85
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -90,6 +94,7 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
90
94
|
beta_schedule: str = "linear",
|
91
95
|
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
92
96
|
prediction_type: str = "epsilon",
|
97
|
+
use_karras_sigmas: Optional[bool] = False,
|
93
98
|
):
|
94
99
|
if trained_betas is not None:
|
95
100
|
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
@@ -111,6 +116,7 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
111
116
|
|
112
117
|
# set all values
|
113
118
|
self.set_timesteps(num_train_timesteps, None, num_train_timesteps)
|
119
|
+
self.use_karras_sigmas = use_karras_sigmas
|
114
120
|
|
115
121
|
def index_for_timestep(self, timestep, schedule_timesteps=None):
|
116
122
|
if schedule_timesteps is None:
|
@@ -165,7 +171,13 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
165
171
|
timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
|
166
172
|
|
167
173
|
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
174
|
+
log_sigmas = np.log(sigmas)
|
168
175
|
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
176
|
+
|
177
|
+
if self.use_karras_sigmas:
|
178
|
+
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
179
|
+
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
180
|
+
|
169
181
|
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
|
170
182
|
sigmas = torch.from_numpy(sigmas).to(device=device)
|
171
183
|
self.sigmas = torch.cat([sigmas[:1], sigmas[1:-1].repeat_interleave(2), sigmas[-1:]])
|
@@ -186,6 +198,44 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
186
198
|
self.prev_derivative = None
|
187
199
|
self.dt = None
|
188
200
|
|
201
|
+
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
|
202
|
+
def _sigma_to_t(self, sigma, log_sigmas):
|
203
|
+
# get log sigma
|
204
|
+
log_sigma = np.log(sigma)
|
205
|
+
|
206
|
+
# get distribution
|
207
|
+
dists = log_sigma - log_sigmas[:, np.newaxis]
|
208
|
+
|
209
|
+
# get sigmas range
|
210
|
+
low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2)
|
211
|
+
high_idx = low_idx + 1
|
212
|
+
|
213
|
+
low = log_sigmas[low_idx]
|
214
|
+
high = log_sigmas[high_idx]
|
215
|
+
|
216
|
+
# interpolate sigmas
|
217
|
+
w = (low - log_sigma) / (low - high)
|
218
|
+
w = np.clip(w, 0, 1)
|
219
|
+
|
220
|
+
# transform interpolation to time range
|
221
|
+
t = (1 - w) * low_idx + w * high_idx
|
222
|
+
t = t.reshape(sigma.shape)
|
223
|
+
return t
|
224
|
+
|
225
|
+
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
|
226
|
+
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
227
|
+
"""Constructs the noise schedule of Karras et al. (2022)."""
|
228
|
+
|
229
|
+
sigma_min: float = in_sigmas[-1].item()
|
230
|
+
sigma_max: float = in_sigmas[0].item()
|
231
|
+
|
232
|
+
rho = 7.0 # 7.0 is the value used in the paper
|
233
|
+
ramp = np.linspace(0, 1, num_inference_steps)
|
234
|
+
min_inv_rho = sigma_min ** (1 / rho)
|
235
|
+
max_inv_rho = sigma_max ** (1 / rho)
|
236
|
+
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
237
|
+
return sigmas
|
238
|
+
|
189
239
|
@property
|
190
240
|
def state_in_first_order(self):
|
191
241
|
return self.dt is None
|
diffusers/utils/__init__.py
CHANGED
@@ -44,6 +44,7 @@ from .hub_utils import (
|
|
44
44
|
http_user_agent,
|
45
45
|
)
|
46
46
|
from .import_utils import (
|
47
|
+
BACKENDS_MAPPING,
|
47
48
|
ENV_VARS_TRUE_AND_AUTO_VALUES,
|
48
49
|
ENV_VARS_TRUE_VALUES,
|
49
50
|
USE_JAX,
|
@@ -53,7 +54,9 @@ from .import_utils import (
|
|
53
54
|
OptionalDependencyNotAvailable,
|
54
55
|
is_accelerate_available,
|
55
56
|
is_accelerate_version,
|
57
|
+
is_bs4_available,
|
56
58
|
is_flax_available,
|
59
|
+
is_ftfy_available,
|
57
60
|
is_inflect_available,
|
58
61
|
is_k_diffusion_available,
|
59
62
|
is_k_diffusion_version,
|
@@ -76,7 +79,7 @@ from .import_utils import (
|
|
76
79
|
)
|
77
80
|
from .logging import get_logger
|
78
81
|
from .outputs import BaseOutput
|
79
|
-
from .pil_utils import PIL_INTERPOLATION
|
82
|
+
from .pil_utils import PIL_INTERPOLATION, numpy_to_pil, pt_to_pil
|
80
83
|
from .torch_utils import is_compiled_module, randn_tensor
|
81
84
|
|
82
85
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
from ..utils import DummyObject, requires_backends
|
3
3
|
|
4
4
|
|
5
|
-
class
|
5
|
+
class AltDiffusionImg2ImgPipeline(metaclass=DummyObject):
|
6
6
|
_backends = ["torch", "transformers"]
|
7
7
|
|
8
8
|
def __init__(self, *args, **kwargs):
|
@@ -17,7 +17,7 @@ class TextualInversionLoaderMixin(metaclass=DummyObject):
|
|
17
17
|
requires_backends(cls, ["torch", "transformers"])
|
18
18
|
|
19
19
|
|
20
|
-
class
|
20
|
+
class AltDiffusionPipeline(metaclass=DummyObject):
|
21
21
|
_backends = ["torch", "transformers"]
|
22
22
|
|
23
23
|
def __init__(self, *args, **kwargs):
|
@@ -32,7 +32,7 @@ class AltDiffusionImg2ImgPipeline(metaclass=DummyObject):
|
|
32
32
|
requires_backends(cls, ["torch", "transformers"])
|
33
33
|
|
34
34
|
|
35
|
-
class
|
35
|
+
class AudioLDMPipeline(metaclass=DummyObject):
|
36
36
|
_backends = ["torch", "transformers"]
|
37
37
|
|
38
38
|
def __init__(self, *args, **kwargs):
|
@@ -47,7 +47,7 @@ class AltDiffusionPipeline(metaclass=DummyObject):
|
|
47
47
|
requires_backends(cls, ["torch", "transformers"])
|
48
48
|
|
49
49
|
|
50
|
-
class
|
50
|
+
class CycleDiffusionPipeline(metaclass=DummyObject):
|
51
51
|
_backends = ["torch", "transformers"]
|
52
52
|
|
53
53
|
def __init__(self, *args, **kwargs):
|
@@ -62,7 +62,82 @@ class AudioLDMPipeline(metaclass=DummyObject):
|
|
62
62
|
requires_backends(cls, ["torch", "transformers"])
|
63
63
|
|
64
64
|
|
65
|
-
class
|
65
|
+
class IFImg2ImgPipeline(metaclass=DummyObject):
|
66
|
+
_backends = ["torch", "transformers"]
|
67
|
+
|
68
|
+
def __init__(self, *args, **kwargs):
|
69
|
+
requires_backends(self, ["torch", "transformers"])
|
70
|
+
|
71
|
+
@classmethod
|
72
|
+
def from_config(cls, *args, **kwargs):
|
73
|
+
requires_backends(cls, ["torch", "transformers"])
|
74
|
+
|
75
|
+
@classmethod
|
76
|
+
def from_pretrained(cls, *args, **kwargs):
|
77
|
+
requires_backends(cls, ["torch", "transformers"])
|
78
|
+
|
79
|
+
|
80
|
+
class IFImg2ImgSuperResolutionPipeline(metaclass=DummyObject):
|
81
|
+
_backends = ["torch", "transformers"]
|
82
|
+
|
83
|
+
def __init__(self, *args, **kwargs):
|
84
|
+
requires_backends(self, ["torch", "transformers"])
|
85
|
+
|
86
|
+
@classmethod
|
87
|
+
def from_config(cls, *args, **kwargs):
|
88
|
+
requires_backends(cls, ["torch", "transformers"])
|
89
|
+
|
90
|
+
@classmethod
|
91
|
+
def from_pretrained(cls, *args, **kwargs):
|
92
|
+
requires_backends(cls, ["torch", "transformers"])
|
93
|
+
|
94
|
+
|
95
|
+
class IFInpaintingPipeline(metaclass=DummyObject):
|
96
|
+
_backends = ["torch", "transformers"]
|
97
|
+
|
98
|
+
def __init__(self, *args, **kwargs):
|
99
|
+
requires_backends(self, ["torch", "transformers"])
|
100
|
+
|
101
|
+
@classmethod
|
102
|
+
def from_config(cls, *args, **kwargs):
|
103
|
+
requires_backends(cls, ["torch", "transformers"])
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
def from_pretrained(cls, *args, **kwargs):
|
107
|
+
requires_backends(cls, ["torch", "transformers"])
|
108
|
+
|
109
|
+
|
110
|
+
class IFInpaintingSuperResolutionPipeline(metaclass=DummyObject):
|
111
|
+
_backends = ["torch", "transformers"]
|
112
|
+
|
113
|
+
def __init__(self, *args, **kwargs):
|
114
|
+
requires_backends(self, ["torch", "transformers"])
|
115
|
+
|
116
|
+
@classmethod
|
117
|
+
def from_config(cls, *args, **kwargs):
|
118
|
+
requires_backends(cls, ["torch", "transformers"])
|
119
|
+
|
120
|
+
@classmethod
|
121
|
+
def from_pretrained(cls, *args, **kwargs):
|
122
|
+
requires_backends(cls, ["torch", "transformers"])
|
123
|
+
|
124
|
+
|
125
|
+
class IFPipeline(metaclass=DummyObject):
|
126
|
+
_backends = ["torch", "transformers"]
|
127
|
+
|
128
|
+
def __init__(self, *args, **kwargs):
|
129
|
+
requires_backends(self, ["torch", "transformers"])
|
130
|
+
|
131
|
+
@classmethod
|
132
|
+
def from_config(cls, *args, **kwargs):
|
133
|
+
requires_backends(cls, ["torch", "transformers"])
|
134
|
+
|
135
|
+
@classmethod
|
136
|
+
def from_pretrained(cls, *args, **kwargs):
|
137
|
+
requires_backends(cls, ["torch", "transformers"])
|
138
|
+
|
139
|
+
|
140
|
+
class IFSuperResolutionPipeline(metaclass=DummyObject):
|
66
141
|
_backends = ["torch", "transformers"]
|
67
142
|
|
68
143
|
def __init__(self, *args, **kwargs):
|
@@ -267,7 +267,7 @@ def get_cached_module_file(
|
|
267
267
|
|
268
268
|
# retrieve github version that matches
|
269
269
|
if revision is None:
|
270
|
-
revision = latest_version if latest_version in available_versions else "main"
|
270
|
+
revision = latest_version if latest_version[1:] in available_versions else "main"
|
271
271
|
logger.info(f"Defaulting to latest_version: {revision}.")
|
272
272
|
elif revision in available_versions:
|
273
273
|
revision = f"v{revision}"
|
diffusers/utils/hub_utils.py
CHANGED
@@ -199,7 +199,10 @@ if not os.path.isfile(cache_version_file):
|
|
199
199
|
cache_version = 0
|
200
200
|
else:
|
201
201
|
with open(cache_version_file) as f:
|
202
|
-
|
202
|
+
try:
|
203
|
+
cache_version = int(f.read())
|
204
|
+
except ValueError:
|
205
|
+
cache_version = 0
|
203
206
|
|
204
207
|
if cache_version < 1:
|
205
208
|
old_cache_is_not_empty = os.path.isdir(old_diffusers_cache) and len(os.listdir(old_diffusers_cache)) > 0
|
diffusers/utils/import_utils.py
CHANGED
@@ -271,6 +271,23 @@ except importlib_metadata.PackageNotFoundError:
|
|
271
271
|
_compel_available = False
|
272
272
|
|
273
273
|
|
274
|
+
_ftfy_available = importlib.util.find_spec("ftfy") is not None
|
275
|
+
try:
|
276
|
+
_ftfy_version = importlib_metadata.version("ftfy")
|
277
|
+
logger.debug(f"Successfully imported ftfy version {_ftfy_version}")
|
278
|
+
except importlib_metadata.PackageNotFoundError:
|
279
|
+
_ftfy_available = False
|
280
|
+
|
281
|
+
|
282
|
+
_bs4_available = importlib.util.find_spec("bs4") is not None
|
283
|
+
try:
|
284
|
+
# importlib metadata under different name
|
285
|
+
_bs4_version = importlib_metadata.version("beautifulsoup4")
|
286
|
+
logger.debug(f"Successfully imported ftfy version {_bs4_version}")
|
287
|
+
except importlib_metadata.PackageNotFoundError:
|
288
|
+
_bs4_available = False
|
289
|
+
|
290
|
+
|
274
291
|
def is_torch_available():
|
275
292
|
return _torch_available
|
276
293
|
|
@@ -347,6 +364,14 @@ def is_compel_available():
|
|
347
364
|
return _compel_available
|
348
365
|
|
349
366
|
|
367
|
+
def is_ftfy_available():
|
368
|
+
return _ftfy_available
|
369
|
+
|
370
|
+
|
371
|
+
def is_bs4_available():
|
372
|
+
return _bs4_available
|
373
|
+
|
374
|
+
|
350
375
|
# docstyle-ignore
|
351
376
|
FLAX_IMPORT_ERROR = """
|
352
377
|
{0} requires the FLAX library but it was not found in your environment. Checkout the instructions on the
|
@@ -437,8 +462,23 @@ COMPEL_IMPORT_ERROR = """
|
|
437
462
|
{0} requires the compel library but it was not found in your environment. You can install it with pip: `pip install compel`
|
438
463
|
"""
|
439
464
|
|
465
|
+
# docstyle-ignore
|
466
|
+
BS4_IMPORT_ERROR = """
|
467
|
+
{0} requires the Beautiful Soup library but it was not found in your environment. You can install it with pip:
|
468
|
+
`pip install beautifulsoup4`. Please note that you may need to restart your runtime after installation.
|
469
|
+
"""
|
470
|
+
|
471
|
+
# docstyle-ignore
|
472
|
+
FTFY_IMPORT_ERROR = """
|
473
|
+
{0} requires the ftfy library but it was not found in your environment. Checkout the instructions on the
|
474
|
+
installation section: https://github.com/rspeer/python-ftfy/tree/master#installing and follow the ones
|
475
|
+
that match your environment. Please note that you may need to restart your runtime after installation.
|
476
|
+
"""
|
477
|
+
|
478
|
+
|
440
479
|
BACKENDS_MAPPING = OrderedDict(
|
441
480
|
[
|
481
|
+
("bs4", (is_bs4_available, BS4_IMPORT_ERROR)),
|
442
482
|
("flax", (is_flax_available, FLAX_IMPORT_ERROR)),
|
443
483
|
("inflect", (is_inflect_available, INFLECT_IMPORT_ERROR)),
|
444
484
|
("onnx", (is_onnx_available, ONNX_IMPORT_ERROR)),
|
@@ -454,6 +494,7 @@ BACKENDS_MAPPING = OrderedDict(
|
|
454
494
|
("omegaconf", (is_omegaconf_available, OMEGACONF_IMPORT_ERROR)),
|
455
495
|
("tensorboard", (_tensorboard_available, TENSORBOARD_IMPORT_ERROR)),
|
456
496
|
("compel", (_compel_available, COMPEL_IMPORT_ERROR)),
|
497
|
+
("ftfy", (is_ftfy_available, FTFY_IMPORT_ERROR)),
|
457
498
|
]
|
458
499
|
)
|
459
500
|
|
diffusers/utils/pil_utils.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import PIL.Image
|
2
2
|
import PIL.ImageOps
|
3
3
|
from packaging import version
|
4
|
+
from PIL import Image
|
4
5
|
|
5
6
|
|
6
7
|
if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
|
@@ -19,3 +20,26 @@ else:
|
|
19
20
|
"lanczos": PIL.Image.LANCZOS,
|
20
21
|
"nearest": PIL.Image.NEAREST,
|
21
22
|
}
|
23
|
+
|
24
|
+
|
25
|
+
def pt_to_pil(images):
|
26
|
+
images = (images / 2 + 0.5).clamp(0, 1)
|
27
|
+
images = images.cpu().permute(0, 2, 3, 1).float().numpy()
|
28
|
+
images = numpy_to_pil(images)
|
29
|
+
return images
|
30
|
+
|
31
|
+
|
32
|
+
def numpy_to_pil(images):
|
33
|
+
"""
|
34
|
+
Convert a numpy image or a batch of images to a PIL image.
|
35
|
+
"""
|
36
|
+
if images.ndim == 3:
|
37
|
+
images = images[None, ...]
|
38
|
+
images = (images * 255).round().astype("uint8")
|
39
|
+
if images.shape[-1] == 1:
|
40
|
+
# special case for grayscale (single channel) images
|
41
|
+
pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images]
|
42
|
+
else:
|
43
|
+
pil_images = [Image.fromarray(image) for image in images]
|
44
|
+
|
45
|
+
return pil_images
|
diffusers/utils/testing_utils.py
CHANGED
@@ -279,6 +279,16 @@ def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image:
|
|
279
279
|
return image
|
280
280
|
|
281
281
|
|
282
|
+
def preprocess_image(image: PIL.Image, batch_size: int):
|
283
|
+
w, h = image.size
|
284
|
+
w, h = (x - x % 8 for x in (w, h)) # resize to integer multiple of 8
|
285
|
+
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
|
286
|
+
image = np.array(image).astype(np.float32) / 255.0
|
287
|
+
image = np.vstack([image[None].transpose(0, 3, 1, 2)] * batch_size)
|
288
|
+
image = torch.from_numpy(image)
|
289
|
+
return 2.0 * image - 1.0
|
290
|
+
|
291
|
+
|
282
292
|
def export_to_video(video_frames: List[np.ndarray], output_video_path: str = None) -> str:
|
283
293
|
if is_opencv_available():
|
284
294
|
import cv2
|