diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +26 -1
- diffusers/configuration_utils.py +34 -29
- diffusers/dependency_versions_table.py +4 -0
- diffusers/image_processor.py +125 -12
- diffusers/loaders.py +169 -203
- diffusers/models/attention.py +24 -1
- diffusers/models/attention_flax.py +10 -5
- diffusers/models/attention_processor.py +3 -0
- diffusers/models/autoencoder_kl.py +114 -33
- diffusers/models/controlnet.py +131 -14
- diffusers/models/controlnet_flax.py +37 -26
- diffusers/models/cross_attention.py +17 -17
- diffusers/models/embeddings.py +67 -0
- diffusers/models/modeling_flax_utils.py +64 -56
- diffusers/models/modeling_utils.py +193 -104
- diffusers/models/prior_transformer.py +207 -37
- diffusers/models/resnet.py +26 -26
- diffusers/models/transformer_2d.py +36 -41
- diffusers/models/transformer_temporal.py +24 -21
- diffusers/models/unet_1d.py +31 -25
- diffusers/models/unet_2d.py +43 -30
- diffusers/models/unet_2d_blocks.py +210 -89
- diffusers/models/unet_2d_blocks_flax.py +12 -12
- diffusers/models/unet_2d_condition.py +172 -64
- diffusers/models/unet_2d_condition_flax.py +38 -24
- diffusers/models/unet_3d_blocks.py +34 -31
- diffusers/models/unet_3d_condition.py +101 -34
- diffusers/models/vae.py +5 -5
- diffusers/models/vae_flax.py +37 -34
- diffusers/models/vq_model.py +23 -14
- diffusers/pipelines/__init__.py +24 -1
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
- diffusers/pipelines/consistency_models/__init__.py +1 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
- diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/kandinsky/__init__.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
- diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
- diffusers/pipelines/pipeline_flax_utils.py +2 -2
- diffusers/pipelines/pipeline_utils.py +124 -146
- diffusers/pipelines/shap_e/__init__.py +27 -0
- diffusers/pipelines/shap_e/camera.py +147 -0
- diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
- diffusers/pipelines/shap_e/renderer.py +709 -0
- diffusers/pipelines/stable_diffusion/__init__.py +2 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
- diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
- diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
- diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
- diffusers/schedulers/__init__.py +3 -0
- diffusers/schedulers/scheduling_consistency_models.py +380 -0
- diffusers/schedulers/scheduling_ddim.py +28 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
- diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
- diffusers/schedulers/scheduling_ddpm.py +53 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
- diffusers/schedulers/scheduling_deis_multistep.py +66 -11
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
- diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
- diffusers/schedulers/scheduling_euler_discrete.py +58 -8
- diffusers/schedulers/scheduling_heun_discrete.py +89 -14
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
- diffusers/schedulers/scheduling_lms_discrete.py +57 -8
- diffusers/schedulers/scheduling_pndm.py +46 -10
- diffusers/schedulers/scheduling_repaint.py +19 -4
- diffusers/schedulers/scheduling_sde_ve.py +5 -1
- diffusers/schedulers/scheduling_unclip.py +43 -4
- diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
- diffusers/training_utils.py +1 -1
- diffusers/utils/__init__.py +2 -1
- diffusers/utils/dummy_pt_objects.py +60 -0
- diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
- diffusers/utils/hub_utils.py +1 -1
- diffusers/utils/import_utils.py +20 -3
- diffusers/utils/logging.py +15 -18
- diffusers/utils/outputs.py +3 -3
- diffusers/utils/testing_utils.py +15 -0
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
@@ -45,7 +45,11 @@ class LMSDiscreteSchedulerOutput(BaseOutput):
|
|
45
45
|
|
46
46
|
|
47
47
|
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
|
48
|
-
def betas_for_alpha_bar(
|
48
|
+
def betas_for_alpha_bar(
|
49
|
+
num_diffusion_timesteps,
|
50
|
+
max_beta=0.999,
|
51
|
+
alpha_transform_type="cosine",
|
52
|
+
):
|
49
53
|
"""
|
50
54
|
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
51
55
|
(1-beta) over time from t = [0,1].
|
@@ -58,19 +62,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
|
|
58
62
|
num_diffusion_timesteps (`int`): the number of betas to produce.
|
59
63
|
max_beta (`float`): the maximum beta to use; use values lower than 1 to
|
60
64
|
prevent singularities.
|
65
|
+
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
|
66
|
+
Choose from `cosine` or `exp`
|
61
67
|
|
62
68
|
Returns:
|
63
69
|
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
|
64
70
|
"""
|
71
|
+
if alpha_transform_type == "cosine":
|
65
72
|
|
66
|
-
|
67
|
-
|
73
|
+
def alpha_bar_fn(t):
|
74
|
+
return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
|
75
|
+
|
76
|
+
elif alpha_transform_type == "exp":
|
77
|
+
|
78
|
+
def alpha_bar_fn(t):
|
79
|
+
return math.exp(t * -12.0)
|
80
|
+
|
81
|
+
else:
|
82
|
+
raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
|
68
83
|
|
69
84
|
betas = []
|
70
85
|
for i in range(num_diffusion_timesteps):
|
71
86
|
t1 = i / num_diffusion_timesteps
|
72
87
|
t2 = (i + 1) / num_diffusion_timesteps
|
73
|
-
betas.append(min(1 -
|
88
|
+
betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
|
74
89
|
return torch.tensor(betas, dtype=torch.float32)
|
75
90
|
|
76
91
|
|
@@ -102,6 +117,13 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
102
117
|
prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
|
103
118
|
process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
|
104
119
|
https://imagen.research.google/video/paper.pdf)
|
120
|
+
timestep_spacing (`str`, default `"linspace"`):
|
121
|
+
The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
|
122
|
+
Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
|
123
|
+
steps_offset (`int`, default `0`):
|
124
|
+
an offset added to the inference steps. You can use a combination of `offset=1` and
|
125
|
+
`set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
|
126
|
+
stable diffusion.
|
105
127
|
"""
|
106
128
|
|
107
129
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -117,6 +139,8 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
117
139
|
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
118
140
|
use_karras_sigmas: Optional[bool] = False,
|
119
141
|
prediction_type: str = "epsilon",
|
142
|
+
timestep_spacing: str = "linspace",
|
143
|
+
steps_offset: int = 0,
|
120
144
|
):
|
121
145
|
if trained_betas is not None:
|
122
146
|
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
@@ -140,9 +164,6 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
140
164
|
sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
|
141
165
|
self.sigmas = torch.from_numpy(sigmas)
|
142
166
|
|
143
|
-
# standard deviation of the initial noise distribution
|
144
|
-
self.init_noise_sigma = self.sigmas.max()
|
145
|
-
|
146
167
|
# setable values
|
147
168
|
self.num_inference_steps = None
|
148
169
|
self.use_karras_sigmas = use_karras_sigmas
|
@@ -150,6 +171,14 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
150
171
|
self.derivatives = []
|
151
172
|
self.is_scale_input_called = False
|
152
173
|
|
174
|
+
@property
|
175
|
+
def init_noise_sigma(self):
|
176
|
+
# standard deviation of the initial noise distribution
|
177
|
+
if self.config.timestep_spacing in ["linspace", "trailing"]:
|
178
|
+
return self.sigmas.max()
|
179
|
+
|
180
|
+
return (self.sigmas.max() ** 2 + 1) ** 0.5
|
181
|
+
|
153
182
|
def scale_model_input(
|
154
183
|
self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
|
155
184
|
) -> torch.FloatTensor:
|
@@ -205,7 +234,27 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
205
234
|
"""
|
206
235
|
self.num_inference_steps = num_inference_steps
|
207
236
|
|
208
|
-
|
237
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
|
238
|
+
if self.config.timestep_spacing == "linspace":
|
239
|
+
timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=float)[
|
240
|
+
::-1
|
241
|
+
].copy()
|
242
|
+
elif self.config.timestep_spacing == "leading":
|
243
|
+
step_ratio = self.config.num_train_timesteps // self.num_inference_steps
|
244
|
+
# creates integer timesteps by multiplying by ratio
|
245
|
+
# casting to int to avoid issues when num_inference_step is power of 3
|
246
|
+
timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float)
|
247
|
+
timesteps += self.config.steps_offset
|
248
|
+
elif self.config.timestep_spacing == "trailing":
|
249
|
+
step_ratio = self.config.num_train_timesteps / self.num_inference_steps
|
250
|
+
# creates integer timesteps by multiplying by ratio
|
251
|
+
# casting to int to avoid issues when num_inference_step is power of 3
|
252
|
+
timesteps = (np.arange(self.config.num_train_timesteps, 0, -step_ratio)).round().copy().astype(float)
|
253
|
+
timesteps -= 1
|
254
|
+
else:
|
255
|
+
raise ValueError(
|
256
|
+
f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
|
257
|
+
)
|
209
258
|
|
210
259
|
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
211
260
|
log_sigmas = np.log(sigmas)
|
@@ -25,7 +25,11 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
|
|
25
25
|
|
26
26
|
|
27
27
|
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
|
28
|
-
def betas_for_alpha_bar(
|
28
|
+
def betas_for_alpha_bar(
|
29
|
+
num_diffusion_timesteps,
|
30
|
+
max_beta=0.999,
|
31
|
+
alpha_transform_type="cosine",
|
32
|
+
):
|
29
33
|
"""
|
30
34
|
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
31
35
|
(1-beta) over time from t = [0,1].
|
@@ -38,19 +42,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
|
|
38
42
|
num_diffusion_timesteps (`int`): the number of betas to produce.
|
39
43
|
max_beta (`float`): the maximum beta to use; use values lower than 1 to
|
40
44
|
prevent singularities.
|
45
|
+
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
|
46
|
+
Choose from `cosine` or `exp`
|
41
47
|
|
42
48
|
Returns:
|
43
49
|
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
|
44
50
|
"""
|
51
|
+
if alpha_transform_type == "cosine":
|
45
52
|
|
46
|
-
|
47
|
-
|
53
|
+
def alpha_bar_fn(t):
|
54
|
+
return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
|
55
|
+
|
56
|
+
elif alpha_transform_type == "exp":
|
57
|
+
|
58
|
+
def alpha_bar_fn(t):
|
59
|
+
return math.exp(t * -12.0)
|
60
|
+
|
61
|
+
else:
|
62
|
+
raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
|
48
63
|
|
49
64
|
betas = []
|
50
65
|
for i in range(num_diffusion_timesteps):
|
51
66
|
t1 = i / num_diffusion_timesteps
|
52
67
|
t2 = (i + 1) / num_diffusion_timesteps
|
53
|
-
betas.append(min(1 -
|
68
|
+
betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
|
54
69
|
return torch.tensor(betas, dtype=torch.float32)
|
55
70
|
|
56
71
|
|
@@ -85,11 +100,13 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
85
100
|
prediction_type (`str`, default `epsilon`, optional):
|
86
101
|
prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion process)
|
87
102
|
or `v_prediction` (see section 2.4 https://imagen.research.google/video/paper.pdf)
|
103
|
+
timestep_spacing (`str`, default `"leading"`):
|
104
|
+
The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
|
105
|
+
Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
|
88
106
|
steps_offset (`int`, default `0`):
|
89
107
|
an offset added to the inference steps. You can use a combination of `offset=1` and
|
90
108
|
`set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
|
91
109
|
stable diffusion.
|
92
|
-
|
93
110
|
"""
|
94
111
|
|
95
112
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -106,6 +123,7 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
106
123
|
skip_prk_steps: bool = False,
|
107
124
|
set_alpha_to_one: bool = False,
|
108
125
|
prediction_type: str = "epsilon",
|
126
|
+
timestep_spacing: str = "leading",
|
109
127
|
steps_offset: int = 0,
|
110
128
|
):
|
111
129
|
if trained_betas is not None:
|
@@ -159,11 +177,29 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
159
177
|
"""
|
160
178
|
|
161
179
|
self.num_inference_steps = num_inference_steps
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
180
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
|
181
|
+
if self.config.timestep_spacing == "linspace":
|
182
|
+
self._timesteps = (
|
183
|
+
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps).round().astype(np.int64)
|
184
|
+
)
|
185
|
+
elif self.config.timestep_spacing == "leading":
|
186
|
+
step_ratio = self.config.num_train_timesteps // self.num_inference_steps
|
187
|
+
# creates integer timesteps by multiplying by ratio
|
188
|
+
# casting to int to avoid issues when num_inference_step is power of 3
|
189
|
+
self._timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()
|
190
|
+
self._timesteps += self.config.steps_offset
|
191
|
+
elif self.config.timestep_spacing == "trailing":
|
192
|
+
step_ratio = self.config.num_train_timesteps / self.num_inference_steps
|
193
|
+
# creates integer timesteps by multiplying by ratio
|
194
|
+
# casting to int to avoid issues when num_inference_step is power of 3
|
195
|
+
self._timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio))[::-1].astype(
|
196
|
+
np.int64
|
197
|
+
)
|
198
|
+
self._timesteps -= 1
|
199
|
+
else:
|
200
|
+
raise ValueError(
|
201
|
+
f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
|
202
|
+
)
|
167
203
|
|
168
204
|
if self.config.skip_prk_steps:
|
169
205
|
# for some models like stable diffusion the prk steps can/should be skipped to
|
@@ -43,7 +43,11 @@ class RePaintSchedulerOutput(BaseOutput):
|
|
43
43
|
|
44
44
|
|
45
45
|
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
|
46
|
-
def betas_for_alpha_bar(
|
46
|
+
def betas_for_alpha_bar(
|
47
|
+
num_diffusion_timesteps,
|
48
|
+
max_beta=0.999,
|
49
|
+
alpha_transform_type="cosine",
|
50
|
+
):
|
47
51
|
"""
|
48
52
|
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
49
53
|
(1-beta) over time from t = [0,1].
|
@@ -56,19 +60,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
|
|
56
60
|
num_diffusion_timesteps (`int`): the number of betas to produce.
|
57
61
|
max_beta (`float`): the maximum beta to use; use values lower than 1 to
|
58
62
|
prevent singularities.
|
63
|
+
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
|
64
|
+
Choose from `cosine` or `exp`
|
59
65
|
|
60
66
|
Returns:
|
61
67
|
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
|
62
68
|
"""
|
69
|
+
if alpha_transform_type == "cosine":
|
63
70
|
|
64
|
-
|
65
|
-
|
71
|
+
def alpha_bar_fn(t):
|
72
|
+
return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
|
73
|
+
|
74
|
+
elif alpha_transform_type == "exp":
|
75
|
+
|
76
|
+
def alpha_bar_fn(t):
|
77
|
+
return math.exp(t * -12.0)
|
78
|
+
|
79
|
+
else:
|
80
|
+
raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
|
66
81
|
|
67
82
|
betas = []
|
68
83
|
for i in range(num_diffusion_timesteps):
|
69
84
|
t1 = i / num_diffusion_timesteps
|
70
85
|
t2 = (i + 1) / num_diffusion_timesteps
|
71
|
-
betas.append(min(1 -
|
86
|
+
betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
|
72
87
|
return torch.tensor(betas, dtype=torch.float32)
|
73
88
|
|
74
89
|
|
@@ -276,7 +276,11 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
|
|
276
276
|
# Make sure sigmas and timesteps have the same device and dtype as original_samples
|
277
277
|
timesteps = timesteps.to(original_samples.device)
|
278
278
|
sigmas = self.discrete_sigmas.to(original_samples.device)[timesteps]
|
279
|
-
noise =
|
279
|
+
noise = (
|
280
|
+
noise * sigmas[:, None, None, None]
|
281
|
+
if noise is not None
|
282
|
+
else torch.randn_like(original_samples) * sigmas[:, None, None, None]
|
283
|
+
)
|
280
284
|
noisy_samples = noise + original_samples
|
281
285
|
return noisy_samples
|
282
286
|
|
@@ -44,7 +44,11 @@ class UnCLIPSchedulerOutput(BaseOutput):
|
|
44
44
|
|
45
45
|
|
46
46
|
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
|
47
|
-
def betas_for_alpha_bar(
|
47
|
+
def betas_for_alpha_bar(
|
48
|
+
num_diffusion_timesteps,
|
49
|
+
max_beta=0.999,
|
50
|
+
alpha_transform_type="cosine",
|
51
|
+
):
|
48
52
|
"""
|
49
53
|
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
50
54
|
(1-beta) over time from t = [0,1].
|
@@ -57,19 +61,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
|
|
57
61
|
num_diffusion_timesteps (`int`): the number of betas to produce.
|
58
62
|
max_beta (`float`): the maximum beta to use; use values lower than 1 to
|
59
63
|
prevent singularities.
|
64
|
+
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
|
65
|
+
Choose from `cosine` or `exp`
|
60
66
|
|
61
67
|
Returns:
|
62
68
|
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
|
63
69
|
"""
|
70
|
+
if alpha_transform_type == "cosine":
|
64
71
|
|
65
|
-
|
66
|
-
|
72
|
+
def alpha_bar_fn(t):
|
73
|
+
return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
|
74
|
+
|
75
|
+
elif alpha_transform_type == "exp":
|
76
|
+
|
77
|
+
def alpha_bar_fn(t):
|
78
|
+
return math.exp(t * -12.0)
|
79
|
+
|
80
|
+
else:
|
81
|
+
raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
|
67
82
|
|
68
83
|
betas = []
|
69
84
|
for i in range(num_diffusion_timesteps):
|
70
85
|
t1 = i / num_diffusion_timesteps
|
71
86
|
t2 = (i + 1) / num_diffusion_timesteps
|
72
|
-
betas.append(min(1 -
|
87
|
+
betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
|
73
88
|
return torch.tensor(betas, dtype=torch.float32)
|
74
89
|
|
75
90
|
|
@@ -307,3 +322,27 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
|
|
307
322
|
return (pred_prev_sample,)
|
308
323
|
|
309
324
|
return UnCLIPSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample)
|
325
|
+
|
326
|
+
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
|
327
|
+
def add_noise(
|
328
|
+
self,
|
329
|
+
original_samples: torch.FloatTensor,
|
330
|
+
noise: torch.FloatTensor,
|
331
|
+
timesteps: torch.IntTensor,
|
332
|
+
) -> torch.FloatTensor:
|
333
|
+
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
|
334
|
+
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
|
335
|
+
timesteps = timesteps.to(original_samples.device)
|
336
|
+
|
337
|
+
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
338
|
+
sqrt_alpha_prod = sqrt_alpha_prod.flatten()
|
339
|
+
while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
|
340
|
+
sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
|
341
|
+
|
342
|
+
sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
|
343
|
+
sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
|
344
|
+
while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
|
345
|
+
sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
|
346
|
+
|
347
|
+
noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
|
348
|
+
return noisy_samples
|
@@ -117,6 +117,17 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
117
117
|
by disable the corrector at the first few steps (e.g., disable_corrector=[0])
|
118
118
|
solver_p (`SchedulerMixin`, default `None`):
|
119
119
|
can be any other scheduler. If specified, the algorithm will become solver_p + UniC.
|
120
|
+
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
121
|
+
This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
|
122
|
+
noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
|
123
|
+
of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
|
124
|
+
timestep_spacing (`str`, default `"linspace"`):
|
125
|
+
The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
|
126
|
+
Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
|
127
|
+
steps_offset (`int`, default `0`):
|
128
|
+
an offset added to the inference steps. You can use a combination of `offset=1` and
|
129
|
+
`set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
|
130
|
+
stable diffusion.
|
120
131
|
"""
|
121
132
|
|
122
133
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -140,6 +151,9 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
140
151
|
lower_order_final: bool = True,
|
141
152
|
disable_corrector: List[int] = [],
|
142
153
|
solver_p: SchedulerMixin = None,
|
154
|
+
use_karras_sigmas: Optional[bool] = False,
|
155
|
+
timestep_spacing: str = "linspace",
|
156
|
+
steps_offset: int = 0,
|
143
157
|
):
|
144
158
|
if trained_betas is not None:
|
145
159
|
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
@@ -168,7 +182,7 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
168
182
|
|
169
183
|
if solver_type not in ["bh1", "bh2"]:
|
170
184
|
if solver_type in ["midpoint", "heun", "logrho"]:
|
171
|
-
self.register_to_config(solver_type="
|
185
|
+
self.register_to_config(solver_type="bh2")
|
172
186
|
else:
|
173
187
|
raise NotImplementedError(f"{solver_type} does is not implemented for {self.__class__}")
|
174
188
|
|
@@ -194,12 +208,39 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
194
208
|
device (`str` or `torch.device`, optional):
|
195
209
|
the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
196
210
|
"""
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
211
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
|
212
|
+
if self.config.timestep_spacing == "linspace":
|
213
|
+
timesteps = (
|
214
|
+
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
|
215
|
+
.round()[::-1][:-1]
|
216
|
+
.copy()
|
217
|
+
.astype(np.int64)
|
218
|
+
)
|
219
|
+
elif self.config.timestep_spacing == "leading":
|
220
|
+
step_ratio = self.config.num_train_timesteps // (num_inference_steps + 1)
|
221
|
+
# creates integer timesteps by multiplying by ratio
|
222
|
+
# casting to int to avoid issues when num_inference_step is power of 3
|
223
|
+
timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64)
|
224
|
+
timesteps += self.config.steps_offset
|
225
|
+
elif self.config.timestep_spacing == "trailing":
|
226
|
+
step_ratio = self.config.num_train_timesteps / num_inference_steps
|
227
|
+
# creates integer timesteps by multiplying by ratio
|
228
|
+
# casting to int to avoid issues when num_inference_step is power of 3
|
229
|
+
timesteps = np.arange(self.config.num_train_timesteps, 0, -step_ratio).round().copy().astype(np.int64)
|
230
|
+
timesteps -= 1
|
231
|
+
else:
|
232
|
+
raise ValueError(
|
233
|
+
f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
|
234
|
+
)
|
235
|
+
|
236
|
+
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
237
|
+
if self.config.use_karras_sigmas:
|
238
|
+
log_sigmas = np.log(sigmas)
|
239
|
+
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
240
|
+
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
|
241
|
+
timesteps = np.flip(timesteps).copy().astype(np.int64)
|
242
|
+
|
243
|
+
self.sigmas = torch.from_numpy(sigmas)
|
203
244
|
|
204
245
|
# when num_inference_steps == num_train_timesteps, we can end up with
|
205
246
|
# duplicates in timesteps.
|
diffusers/training_utils.py
CHANGED
diffusers/utils/__init__.py
CHANGED
@@ -58,6 +58,7 @@ from .import_utils import (
|
|
58
58
|
is_flax_available,
|
59
59
|
is_ftfy_available,
|
60
60
|
is_inflect_available,
|
61
|
+
is_invisible_watermark_available,
|
61
62
|
is_k_diffusion_available,
|
62
63
|
is_k_diffusion_version,
|
63
64
|
is_librosa_available,
|
@@ -103,7 +104,7 @@ if is_torch_available():
|
|
103
104
|
)
|
104
105
|
from .torch_utils import maybe_allow_in_graph
|
105
106
|
|
106
|
-
from .testing_utils import export_to_video
|
107
|
+
from .testing_utils import export_to_gif, export_to_video
|
107
108
|
|
108
109
|
|
109
110
|
logger = get_logger(__name__)
|
@@ -210,6 +210,21 @@ class AudioPipelineOutput(metaclass=DummyObject):
|
|
210
210
|
requires_backends(cls, ["torch"])
|
211
211
|
|
212
212
|
|
213
|
+
class ConsistencyModelPipeline(metaclass=DummyObject):
|
214
|
+
_backends = ["torch"]
|
215
|
+
|
216
|
+
def __init__(self, *args, **kwargs):
|
217
|
+
requires_backends(self, ["torch"])
|
218
|
+
|
219
|
+
@classmethod
|
220
|
+
def from_config(cls, *args, **kwargs):
|
221
|
+
requires_backends(cls, ["torch"])
|
222
|
+
|
223
|
+
@classmethod
|
224
|
+
def from_pretrained(cls, *args, **kwargs):
|
225
|
+
requires_backends(cls, ["torch"])
|
226
|
+
|
227
|
+
|
213
228
|
class DanceDiffusionPipeline(metaclass=DummyObject):
|
214
229
|
_backends = ["torch"]
|
215
230
|
|
@@ -390,6 +405,21 @@ class ScoreSdeVePipeline(metaclass=DummyObject):
|
|
390
405
|
requires_backends(cls, ["torch"])
|
391
406
|
|
392
407
|
|
408
|
+
class CMStochasticIterativeScheduler(metaclass=DummyObject):
|
409
|
+
_backends = ["torch"]
|
410
|
+
|
411
|
+
def __init__(self, *args, **kwargs):
|
412
|
+
requires_backends(self, ["torch"])
|
413
|
+
|
414
|
+
@classmethod
|
415
|
+
def from_config(cls, *args, **kwargs):
|
416
|
+
requires_backends(cls, ["torch"])
|
417
|
+
|
418
|
+
@classmethod
|
419
|
+
def from_pretrained(cls, *args, **kwargs):
|
420
|
+
requires_backends(cls, ["torch"])
|
421
|
+
|
422
|
+
|
393
423
|
class DDIMInverseScheduler(metaclass=DummyObject):
|
394
424
|
_backends = ["torch"]
|
395
425
|
|
@@ -405,6 +435,21 @@ class DDIMInverseScheduler(metaclass=DummyObject):
|
|
405
435
|
requires_backends(cls, ["torch"])
|
406
436
|
|
407
437
|
|
438
|
+
class DDIMParallelScheduler(metaclass=DummyObject):
|
439
|
+
_backends = ["torch"]
|
440
|
+
|
441
|
+
def __init__(self, *args, **kwargs):
|
442
|
+
requires_backends(self, ["torch"])
|
443
|
+
|
444
|
+
@classmethod
|
445
|
+
def from_config(cls, *args, **kwargs):
|
446
|
+
requires_backends(cls, ["torch"])
|
447
|
+
|
448
|
+
@classmethod
|
449
|
+
def from_pretrained(cls, *args, **kwargs):
|
450
|
+
requires_backends(cls, ["torch"])
|
451
|
+
|
452
|
+
|
408
453
|
class DDIMScheduler(metaclass=DummyObject):
|
409
454
|
_backends = ["torch"]
|
410
455
|
|
@@ -420,6 +465,21 @@ class DDIMScheduler(metaclass=DummyObject):
|
|
420
465
|
requires_backends(cls, ["torch"])
|
421
466
|
|
422
467
|
|
468
|
+
class DDPMParallelScheduler(metaclass=DummyObject):
|
469
|
+
_backends = ["torch"]
|
470
|
+
|
471
|
+
def __init__(self, *args, **kwargs):
|
472
|
+
requires_backends(self, ["torch"])
|
473
|
+
|
474
|
+
@classmethod
|
475
|
+
def from_config(cls, *args, **kwargs):
|
476
|
+
requires_backends(cls, ["torch"])
|
477
|
+
|
478
|
+
@classmethod
|
479
|
+
def from_pretrained(cls, *args, **kwargs):
|
480
|
+
requires_backends(cls, ["torch"])
|
481
|
+
|
482
|
+
|
423
483
|
class DDPMScheduler(metaclass=DummyObject):
|
424
484
|
_backends = ["torch"]
|
425
485
|
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# This file is autogenerated by the command `make fix-copies`, do not edit.
|
2
|
+
from ..utils import DummyObject, requires_backends
|
3
|
+
|
4
|
+
|
5
|
+
class StableDiffusionXLImg2ImgPipeline(metaclass=DummyObject):
|
6
|
+
_backends = ["torch", "transformers", "invisible_watermark"]
|
7
|
+
|
8
|
+
def __init__(self, *args, **kwargs):
|
9
|
+
requires_backends(self, ["torch", "transformers", "invisible_watermark"])
|
10
|
+
|
11
|
+
@classmethod
|
12
|
+
def from_config(cls, *args, **kwargs):
|
13
|
+
requires_backends(cls, ["torch", "transformers", "invisible_watermark"])
|
14
|
+
|
15
|
+
@classmethod
|
16
|
+
def from_pretrained(cls, *args, **kwargs):
|
17
|
+
requires_backends(cls, ["torch", "transformers", "invisible_watermark"])
|
18
|
+
|
19
|
+
|
20
|
+
class StableDiffusionXLPipeline(metaclass=DummyObject):
|
21
|
+
_backends = ["torch", "transformers", "invisible_watermark"]
|
22
|
+
|
23
|
+
def __init__(self, *args, **kwargs):
|
24
|
+
requires_backends(self, ["torch", "transformers", "invisible_watermark"])
|
25
|
+
|
26
|
+
@classmethod
|
27
|
+
def from_config(cls, *args, **kwargs):
|
28
|
+
requires_backends(cls, ["torch", "transformers", "invisible_watermark"])
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def from_pretrained(cls, *args, **kwargs):
|
32
|
+
requires_backends(cls, ["torch", "transformers", "invisible_watermark"])
|