diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +3 -1
- diffusers/commands/fp16_safetensors.py +2 -7
- diffusers/configuration_utils.py +23 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/loaders.py +62 -64
- diffusers/models/__init__.py +1 -0
- diffusers/models/activations.py +2 -0
- diffusers/models/attention.py +45 -1
- diffusers/models/autoencoder_tiny.py +193 -0
- diffusers/models/controlnet.py +1 -1
- diffusers/models/embeddings.py +56 -0
- diffusers/models/lora.py +0 -6
- diffusers/models/modeling_flax_utils.py +28 -2
- diffusers/models/modeling_utils.py +33 -16
- diffusers/models/transformer_2d.py +26 -9
- diffusers/models/unet_1d.py +2 -2
- diffusers/models/unet_2d_blocks.py +106 -56
- diffusers/models/unet_2d_condition.py +20 -5
- diffusers/models/vae.py +106 -1
- diffusers/pipelines/__init__.py +1 -0
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
- diffusers/pipelines/auto_pipeline.py +33 -43
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
- diffusers/pipelines/pipeline_flax_utils.py +41 -4
- diffusers/pipelines/pipeline_utils.py +60 -16
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/__init__.py +1 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
- diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
- diffusers/schedulers/scheduling_consistency_models.py +70 -57
- diffusers/schedulers/scheduling_ddim.py +76 -71
- diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
- diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
- diffusers/schedulers/scheduling_ddpm.py +68 -67
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
- diffusers/schedulers/scheduling_deis_multistep.py +93 -85
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
- diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
- diffusers/schedulers/scheduling_euler_discrete.py +63 -56
- diffusers/schedulers/scheduling_heun_discrete.py +57 -45
- diffusers/schedulers/scheduling_ipndm.py +27 -22
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
- diffusers/schedulers/scheduling_karras_ve.py +55 -45
- diffusers/schedulers/scheduling_lms_discrete.py +58 -52
- diffusers/schedulers/scheduling_pndm.py +77 -62
- diffusers/schedulers/scheduling_repaint.py +56 -38
- diffusers/schedulers/scheduling_sde_ve.py +62 -50
- diffusers/schedulers/scheduling_sde_vp.py +32 -11
- diffusers/schedulers/scheduling_unclip.py +3 -3
- diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
- diffusers/schedulers/scheduling_utils.py +41 -35
- diffusers/schedulers/scheduling_utils_flax.py +8 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
- diffusers/utils/hub_utils.py +105 -2
- diffusers/utils/import_utils.py +0 -4
- diffusers/utils/pil_utils.py +19 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
- diffusers/models/cross_attention.py +0 -94
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -71,42 +71,42 @@ def betas_for_alpha_bar(
|
|
71
71
|
|
72
72
|
class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
73
73
|
"""
|
74
|
-
|
75
|
-
|
74
|
+
`PNDMScheduler` uses pseudo numerical methods for diffusion models such as the Runge-Kutta and linear multi-step
|
75
|
+
method.
|
76
76
|
|
77
|
-
|
78
|
-
|
79
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
80
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
81
|
-
|
82
|
-
For more details, see the original paper: https://arxiv.org/abs/2202.09778
|
77
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
78
|
+
methods the library implements for all schedulers such as loading and saving.
|
83
79
|
|
84
80
|
Args:
|
85
|
-
num_train_timesteps (`int
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
81
|
+
num_train_timesteps (`int`, defaults to 1000):
|
82
|
+
The number of diffusion steps to train the model.
|
83
|
+
beta_start (`float`, defaults to 0.0001):
|
84
|
+
The starting `beta` value of inference.
|
85
|
+
beta_end (`float`, defaults to 0.02):
|
86
|
+
The final `beta` value.
|
87
|
+
beta_schedule (`str`, defaults to `"linear"`):
|
88
|
+
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
90
89
|
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
|
91
|
-
trained_betas (`np.ndarray`, optional):
|
92
|
-
|
93
|
-
skip_prk_steps (`bool`):
|
94
|
-
|
95
|
-
|
96
|
-
set_alpha_to_one (`bool`,
|
97
|
-
|
98
|
-
|
99
|
-
otherwise it uses the value
|
100
|
-
prediction_type (`str`,
|
101
|
-
|
102
|
-
or `v_prediction` (see section 2.4 https://imagen.research.google/video/paper.pdf)
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
90
|
+
trained_betas (`np.ndarray`, *optional*):
|
91
|
+
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
|
92
|
+
skip_prk_steps (`bool`, defaults to `False`):
|
93
|
+
Allows the scheduler to skip the Runge-Kutta steps defined in the original paper as being required before
|
94
|
+
PLMS steps.
|
95
|
+
set_alpha_to_one (`bool`, defaults to `False`):
|
96
|
+
Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
|
97
|
+
there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
|
98
|
+
otherwise it uses the alpha value at step 0.
|
99
|
+
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
100
|
+
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process)
|
101
|
+
or `v_prediction` (see section 2.4 of [Imagen Video](https://imagen.research.google/video/paper.pdf)
|
102
|
+
paper).
|
103
|
+
timestep_spacing (`str`, defaults to `"leading"`):
|
104
|
+
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
105
|
+
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
106
|
+
steps_offset (`int`, defaults to 0):
|
107
|
+
An offset added to the inference steps. You can use a combination of `offset=1` and
|
108
|
+
`set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
|
109
|
+
Diffusion.
|
110
110
|
"""
|
111
111
|
|
112
112
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -169,11 +169,13 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
169
169
|
|
170
170
|
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
171
171
|
"""
|
172
|
-
Sets the discrete timesteps used for the diffusion chain
|
172
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
173
173
|
|
174
174
|
Args:
|
175
175
|
num_inference_steps (`int`):
|
176
|
-
|
176
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
177
|
+
device (`str` or `torch.device`, *optional*):
|
178
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
177
179
|
"""
|
178
180
|
|
179
181
|
self.num_inference_steps = num_inference_steps
|
@@ -233,22 +235,24 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
233
235
|
return_dict: bool = True,
|
234
236
|
) -> Union[SchedulerOutput, Tuple]:
|
235
237
|
"""
|
236
|
-
Predict the sample
|
237
|
-
process from the learned model outputs (most often the predicted noise).
|
238
|
-
|
239
|
-
This function calls `step_prk()` or `step_plms()` depending on the internal variable `counter`.
|
238
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
239
|
+
process from the learned model outputs (most often the predicted noise), and calls [`~PNDMScheduler.step_prk`]
|
240
|
+
or [`~PNDMScheduler.step_plms`] depending on the internal variable `counter`.
|
240
241
|
|
241
242
|
Args:
|
242
|
-
model_output (`torch.FloatTensor`):
|
243
|
-
|
243
|
+
model_output (`torch.FloatTensor`):
|
244
|
+
The direct output from learned diffusion model.
|
245
|
+
timestep (`int`):
|
246
|
+
The current discrete timestep in the diffusion chain.
|
244
247
|
sample (`torch.FloatTensor`):
|
245
|
-
current instance of sample
|
246
|
-
return_dict (`bool`):
|
248
|
+
A current instance of a sample created by the diffusion process.
|
249
|
+
return_dict (`bool`):
|
250
|
+
Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`.
|
247
251
|
|
248
252
|
Returns:
|
249
253
|
[`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
|
250
|
-
|
251
|
-
|
254
|
+
If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
|
255
|
+
tuple is returned where the first element is the sample tensor.
|
252
256
|
|
253
257
|
"""
|
254
258
|
if self.counter < len(self.prk_timesteps) and not self.config.skip_prk_steps:
|
@@ -264,19 +268,24 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
264
268
|
return_dict: bool = True,
|
265
269
|
) -> Union[SchedulerOutput, Tuple]:
|
266
270
|
"""
|
267
|
-
|
268
|
-
solution to the differential
|
271
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
|
272
|
+
the Runge-Kutta method. It performs four forward passes to approximate the solution to the differential
|
273
|
+
equation.
|
269
274
|
|
270
275
|
Args:
|
271
|
-
model_output (`torch.FloatTensor`):
|
272
|
-
|
276
|
+
model_output (`torch.FloatTensor`):
|
277
|
+
The direct output from learned diffusion model.
|
278
|
+
timestep (`int`):
|
279
|
+
The current discrete timestep in the diffusion chain.
|
273
280
|
sample (`torch.FloatTensor`):
|
274
|
-
current instance of sample
|
275
|
-
return_dict (`bool`):
|
281
|
+
A current instance of a sample created by the diffusion process.
|
282
|
+
return_dict (`bool`):
|
283
|
+
Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
|
276
284
|
|
277
285
|
Returns:
|
278
|
-
[`~scheduling_utils.SchedulerOutput`] or `tuple`:
|
279
|
-
|
286
|
+
[`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
|
287
|
+
If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
|
288
|
+
tuple is returned where the first element is the sample tensor.
|
280
289
|
|
281
290
|
"""
|
282
291
|
if self.num_inference_steps is None:
|
@@ -319,19 +328,23 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
319
328
|
return_dict: bool = True,
|
320
329
|
) -> Union[SchedulerOutput, Tuple]:
|
321
330
|
"""
|
322
|
-
|
323
|
-
times to approximate the solution.
|
331
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
|
332
|
+
the linear multistep method. It performs one forward pass multiple times to approximate the solution.
|
324
333
|
|
325
334
|
Args:
|
326
|
-
model_output (`torch.FloatTensor`):
|
327
|
-
|
335
|
+
model_output (`torch.FloatTensor`):
|
336
|
+
The direct output from learned diffusion model.
|
337
|
+
timestep (`int`):
|
338
|
+
The current discrete timestep in the diffusion chain.
|
328
339
|
sample (`torch.FloatTensor`):
|
329
|
-
current instance of sample
|
330
|
-
return_dict (`bool`):
|
340
|
+
A current instance of a sample created by the diffusion process.
|
341
|
+
return_dict (`bool`):
|
342
|
+
Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
|
331
343
|
|
332
344
|
Returns:
|
333
|
-
[`~scheduling_utils.SchedulerOutput`] or `tuple`:
|
334
|
-
|
345
|
+
[`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
|
346
|
+
If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
|
347
|
+
tuple is returned where the first element is the sample tensor.
|
335
348
|
|
336
349
|
"""
|
337
350
|
if self.num_inference_steps is None:
|
@@ -384,10 +397,12 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
384
397
|
current timestep.
|
385
398
|
|
386
399
|
Args:
|
387
|
-
sample (`torch.FloatTensor`):
|
400
|
+
sample (`torch.FloatTensor`):
|
401
|
+
The input sample.
|
388
402
|
|
389
403
|
Returns:
|
390
|
-
`torch.FloatTensor`:
|
404
|
+
`torch.FloatTensor`:
|
405
|
+
A scaled input sample.
|
391
406
|
"""
|
392
407
|
return sample
|
393
408
|
|
@@ -89,32 +89,28 @@ def betas_for_alpha_bar(
|
|
89
89
|
|
90
90
|
class RePaintScheduler(SchedulerMixin, ConfigMixin):
|
91
91
|
"""
|
92
|
-
|
92
|
+
`RePaintScheduler` is a scheduler for DDPM inpainting inside a given mask.
|
93
93
|
|
94
|
-
|
95
|
-
|
96
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
97
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
98
|
-
|
99
|
-
For more details, see the original paper: https://arxiv.org/pdf/2201.09865.pdf
|
94
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
95
|
+
methods the library implements for all schedulers such as loading and saving.
|
100
96
|
|
101
97
|
Args:
|
102
|
-
num_train_timesteps (`int
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
98
|
+
num_train_timesteps (`int`, defaults to 1000):
|
99
|
+
The number of diffusion steps to train the model.
|
100
|
+
beta_start (`float`, defaults to 0.0001):
|
101
|
+
The starting `beta` value of inference.
|
102
|
+
beta_end (`float`, defaults to 0.02):
|
103
|
+
The final `beta` value.
|
104
|
+
beta_schedule (`str`, defaults to `"linear"`):
|
105
|
+
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
106
|
+
`linear`, `scaled_linear`, `squaredcos_cap_v2`, or `sigmoid`.
|
108
107
|
eta (`float`):
|
109
|
-
The weight of noise for added noise in
|
110
|
-
1.0
|
111
|
-
trained_betas (`np.ndarray`, optional):
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
`fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
|
116
|
-
clip_sample (`bool`, default `True`):
|
117
|
-
option to clip predicted sample between -1 and 1 for numerical stability.
|
108
|
+
The weight of noise for added noise in diffusion step. If its value is between 0.0 and 1.0 it corresponds
|
109
|
+
to the DDIM scheduler, and if its value is between -0.0 and 1.0 it corresponds to the DDPM scheduler.
|
110
|
+
trained_betas (`np.ndarray`, *optional*):
|
111
|
+
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
|
112
|
+
clip_sample (`bool`, defaults to `True`):
|
113
|
+
Clip the predicted sample between -1 and 1 for numerical stability.
|
118
114
|
|
119
115
|
"""
|
120
116
|
|
@@ -171,11 +167,14 @@ class RePaintScheduler(SchedulerMixin, ConfigMixin):
|
|
171
167
|
current timestep.
|
172
168
|
|
173
169
|
Args:
|
174
|
-
sample (`torch.FloatTensor`):
|
175
|
-
|
170
|
+
sample (`torch.FloatTensor`):
|
171
|
+
The input sample.
|
172
|
+
timestep (`int`, *optional*):
|
173
|
+
The current timestep in the diffusion chain.
|
176
174
|
|
177
175
|
Returns:
|
178
|
-
`torch.FloatTensor`:
|
176
|
+
`torch.FloatTensor`:
|
177
|
+
A scaled input sample.
|
179
178
|
"""
|
180
179
|
return sample
|
181
180
|
|
@@ -186,6 +185,23 @@ class RePaintScheduler(SchedulerMixin, ConfigMixin):
|
|
186
185
|
jump_n_sample: int = 10,
|
187
186
|
device: Union[str, torch.device] = None,
|
188
187
|
):
|
188
|
+
"""
|
189
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
190
|
+
|
191
|
+
Args:
|
192
|
+
num_inference_steps (`int`):
|
193
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
194
|
+
`timesteps` must be `None`.
|
195
|
+
jump_length (`int`, defaults to 10):
|
196
|
+
The number of steps taken forward in time before going backward in time for a single jump (“j” in
|
197
|
+
RePaint paper). Take a look at Figure 9 and 10 in the paper.
|
198
|
+
jump_n_sample (`int`, defaults to 10):
|
199
|
+
The number of times to make a forward time jump for a given chosen time sample. Take a look at Figure 9
|
200
|
+
and 10 in the paper.
|
201
|
+
device (`str` or `torch.device`, *optional*):
|
202
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
203
|
+
|
204
|
+
"""
|
189
205
|
num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps)
|
190
206
|
self.num_inference_steps = num_inference_steps
|
191
207
|
|
@@ -239,27 +255,29 @@ class RePaintScheduler(SchedulerMixin, ConfigMixin):
|
|
239
255
|
return_dict: bool = True,
|
240
256
|
) -> Union[RePaintSchedulerOutput, Tuple]:
|
241
257
|
"""
|
242
|
-
Predict the sample
|
258
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
243
259
|
process from the learned model outputs (most often the predicted noise).
|
244
260
|
|
245
261
|
Args:
|
246
|
-
model_output (`torch.FloatTensor`):
|
247
|
-
diffusion model.
|
248
|
-
timestep (`int`):
|
262
|
+
model_output (`torch.FloatTensor`):
|
263
|
+
The direct output from learned diffusion model.
|
264
|
+
timestep (`int`):
|
265
|
+
The current discrete timestep in the diffusion chain.
|
249
266
|
sample (`torch.FloatTensor`):
|
250
|
-
current instance of sample
|
267
|
+
A current instance of a sample created by the diffusion process.
|
251
268
|
original_image (`torch.FloatTensor`):
|
252
|
-
|
269
|
+
The original image to inpaint on.
|
253
270
|
mask (`torch.FloatTensor`):
|
254
|
-
|
255
|
-
generator (`torch.Generator`, *optional*):
|
256
|
-
|
257
|
-
|
271
|
+
The mask where a value of 0.0 indicates which part of the original image to inpaint.
|
272
|
+
generator (`torch.Generator`, *optional*):
|
273
|
+
A random number generator.
|
274
|
+
return_dict (`bool`, *optional*, defaults to `True`):
|
275
|
+
Whether or not to return a [`~schedulers.scheduling_repaint.RePaintSchedulerOutput`] or `tuple`.
|
258
276
|
|
259
277
|
Returns:
|
260
|
-
[`~schedulers.
|
261
|
-
|
262
|
-
|
278
|
+
[`~schedulers.scheduling_repaint.RePaintSchedulerOutput`] or `tuple`:
|
279
|
+
If return_dict is `True`, [`~schedulers.scheduling_repaint.RePaintSchedulerOutput`] is returned,
|
280
|
+
otherwise a tuple is returned where the first element is the sample tensor.
|
263
281
|
|
264
282
|
"""
|
265
283
|
t = timestep
|
@@ -28,14 +28,14 @@ from .scheduling_utils import SchedulerMixin, SchedulerOutput
|
|
28
28
|
@dataclass
|
29
29
|
class SdeVeOutput(BaseOutput):
|
30
30
|
"""
|
31
|
-
Output class for the
|
31
|
+
Output class for the scheduler's `step` function output.
|
32
32
|
|
33
33
|
Args:
|
34
34
|
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
35
|
-
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
|
35
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
36
36
|
denoising loop.
|
37
37
|
prev_sample_mean (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
38
|
-
Mean averaged `prev_sample
|
38
|
+
Mean averaged `prev_sample` over previous timesteps.
|
39
39
|
"""
|
40
40
|
|
41
41
|
prev_sample: torch.FloatTensor
|
@@ -44,26 +44,25 @@ class SdeVeOutput(BaseOutput):
|
|
44
44
|
|
45
45
|
class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
|
46
46
|
"""
|
47
|
-
|
47
|
+
`ScoreSdeVeScheduler` is a variance exploding stochastic differential equation (SDE) scheduler.
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
[`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
|
52
|
-
function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
|
53
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
54
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
49
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
50
|
+
methods the library implements for all schedulers such as loading and saving.
|
55
51
|
|
56
52
|
Args:
|
57
|
-
num_train_timesteps (`int
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
53
|
+
num_train_timesteps (`int`, defaults to 1000):
|
54
|
+
The number of diffusion steps to train the model.
|
55
|
+
snr (`float`, defaults to 0.15):
|
56
|
+
A coefficient weighting the step from the `model_output` sample (from the network) to the random noise.
|
57
|
+
sigma_min (`float`, defaults to 0.01):
|
58
|
+
The initial noise scale for the sigma sequence in the sampling procedure. The minimum sigma should mirror
|
59
|
+
the distribution of the data.
|
60
|
+
sigma_max (`float`, defaults to 1348.0):
|
61
|
+
The maximum value used for the range of continuous timesteps passed into the model.
|
62
|
+
sampling_eps (`float`, defaults to 1e-5):
|
63
|
+
The end value of sampling where timesteps decrease progressively from 1 to epsilon.
|
64
|
+
correct_steps (`int`, defaults to 1):
|
65
|
+
The number of correction steps performed on a produced sample.
|
67
66
|
"""
|
68
67
|
|
69
68
|
order = 1
|
@@ -92,11 +91,14 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
|
|
92
91
|
current timestep.
|
93
92
|
|
94
93
|
Args:
|
95
|
-
sample (`torch.FloatTensor`):
|
96
|
-
|
94
|
+
sample (`torch.FloatTensor`):
|
95
|
+
The input sample.
|
96
|
+
timestep (`int`, *optional*):
|
97
|
+
The current timestep in the diffusion chain.
|
97
98
|
|
98
99
|
Returns:
|
99
|
-
`torch.FloatTensor`:
|
100
|
+
`torch.FloatTensor`:
|
101
|
+
A scaled input sample.
|
100
102
|
"""
|
101
103
|
return sample
|
102
104
|
|
@@ -104,13 +106,15 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
|
|
104
106
|
self, num_inference_steps: int, sampling_eps: float = None, device: Union[str, torch.device] = None
|
105
107
|
):
|
106
108
|
"""
|
107
|
-
Sets the continuous timesteps used for the diffusion chain
|
109
|
+
Sets the continuous timesteps used for the diffusion chain (to be run before inference).
|
108
110
|
|
109
111
|
Args:
|
110
112
|
num_inference_steps (`int`):
|
111
|
-
|
112
|
-
sampling_eps (`float`, optional):
|
113
|
-
final timestep value (overrides value given
|
113
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
114
|
+
sampling_eps (`float`, *optional*):
|
115
|
+
The final timestep value (overrides value given during scheduler instantiation).
|
116
|
+
device (`str` or `torch.device`, *optional*):
|
117
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
114
118
|
|
115
119
|
"""
|
116
120
|
sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps
|
@@ -121,19 +125,18 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
|
|
121
125
|
self, num_inference_steps: int, sigma_min: float = None, sigma_max: float = None, sampling_eps: float = None
|
122
126
|
):
|
123
127
|
"""
|
124
|
-
Sets the noise scales used for the diffusion chain
|
125
|
-
|
126
|
-
The sigmas control the weight of the `drift` and `diffusion` components of sample update.
|
128
|
+
Sets the noise scales used for the diffusion chain (to be run before inference). The sigmas control the weight
|
129
|
+
of the `drift` and `diffusion` components of the sample update.
|
127
130
|
|
128
131
|
Args:
|
129
132
|
num_inference_steps (`int`):
|
130
|
-
|
133
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
131
134
|
sigma_min (`float`, optional):
|
132
|
-
initial noise scale value (overrides value given
|
135
|
+
The initial noise scale value (overrides value given during scheduler instantiation).
|
133
136
|
sigma_max (`float`, optional):
|
134
|
-
final noise scale value (overrides value given
|
137
|
+
The final noise scale value (overrides value given during scheduler instantiation).
|
135
138
|
sampling_eps (`float`, optional):
|
136
|
-
final timestep value (overrides value given
|
139
|
+
The final timestep value (overrides value given during scheduler instantiation).
|
137
140
|
|
138
141
|
"""
|
139
142
|
sigma_min = sigma_min if sigma_min is not None else self.config.sigma_min
|
@@ -162,20 +165,25 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
|
|
162
165
|
return_dict: bool = True,
|
163
166
|
) -> Union[SdeVeOutput, Tuple]:
|
164
167
|
"""
|
165
|
-
Predict the sample
|
168
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
166
169
|
process from the learned model outputs (most often the predicted noise).
|
167
170
|
|
168
171
|
Args:
|
169
|
-
model_output (`torch.FloatTensor`):
|
170
|
-
|
172
|
+
model_output (`torch.FloatTensor`):
|
173
|
+
The direct output from learned diffusion model.
|
174
|
+
timestep (`int`):
|
175
|
+
The current discrete timestep in the diffusion chain.
|
171
176
|
sample (`torch.FloatTensor`):
|
172
|
-
current instance of sample
|
173
|
-
generator
|
174
|
-
|
177
|
+
A current instance of a sample created by the diffusion process.
|
178
|
+
generator (`torch.Generator`, *optional*):
|
179
|
+
A random number generator.
|
180
|
+
return_dict (`bool`, *optional*, defaults to `True`):
|
181
|
+
Whether or not to return a [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`.
|
175
182
|
|
176
183
|
Returns:
|
177
|
-
[`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`:
|
178
|
-
|
184
|
+
[`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`:
|
185
|
+
If return_dict is `True`, [`~schedulers.scheduling_sde_ve.SdeVeOutput`] is returned, otherwise a tuple
|
186
|
+
is returned where the first element is the sample tensor.
|
179
187
|
|
180
188
|
"""
|
181
189
|
if self.timesteps is None:
|
@@ -224,19 +232,23 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
|
|
224
232
|
return_dict: bool = True,
|
225
233
|
) -> Union[SchedulerOutput, Tuple]:
|
226
234
|
"""
|
227
|
-
Correct the predicted sample based on the
|
228
|
-
|
235
|
+
Correct the predicted sample based on the `model_output` of the network. This is often run repeatedly after
|
236
|
+
making the prediction for the previous timestep.
|
229
237
|
|
230
238
|
Args:
|
231
|
-
model_output (`torch.FloatTensor`):
|
239
|
+
model_output (`torch.FloatTensor`):
|
240
|
+
The direct output from learned diffusion model.
|
232
241
|
sample (`torch.FloatTensor`):
|
233
|
-
current instance of sample
|
234
|
-
generator
|
235
|
-
|
242
|
+
A current instance of a sample created by the diffusion process.
|
243
|
+
generator (`torch.Generator`, *optional*):
|
244
|
+
A random number generator.
|
245
|
+
return_dict (`bool`, *optional*, defaults to `True`):
|
246
|
+
Whether or not to return a [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`.
|
236
247
|
|
237
248
|
Returns:
|
238
|
-
[`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`:
|
239
|
-
|
249
|
+
[`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`:
|
250
|
+
If return_dict is `True`, [`~schedulers.scheduling_sde_ve.SdeVeOutput`] is returned, otherwise a tuple
|
251
|
+
is returned where the first element is the sample tensor.
|
240
252
|
|
241
253
|
"""
|
242
254
|
if self.timesteps is None:
|
@@ -26,17 +26,18 @@ from .scheduling_utils import SchedulerMixin
|
|
26
26
|
|
27
27
|
class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
|
28
28
|
"""
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
29
|
+
`ScoreSdeVpScheduler` is a variance preserving stochastic differential equation (SDE) scheduler.
|
30
|
+
|
31
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
32
|
+
methods the library implements for all schedulers such as loading and saving.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
num_train_timesteps (`int`, defaults to 2000):
|
36
|
+
The number of diffusion steps to train the model.
|
37
|
+
beta_min (`int`, defaults to 0.1):
|
38
|
+
beta_max (`int`, defaults to 20):
|
39
|
+
sampling_eps (`int`, defaults to 1e-3):
|
40
|
+
The end value of sampling where timesteps decrease progressively from 1 to epsilon.
|
40
41
|
"""
|
41
42
|
|
42
43
|
order = 1
|
@@ -48,9 +49,29 @@ class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
|
|
48
49
|
self.timesteps = None
|
49
50
|
|
50
51
|
def set_timesteps(self, num_inference_steps, device: Union[str, torch.device] = None):
|
52
|
+
"""
|
53
|
+
Sets the continuous timesteps used for the diffusion chain (to be run before inference).
|
54
|
+
|
55
|
+
Args:
|
56
|
+
num_inference_steps (`int`):
|
57
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
58
|
+
device (`str` or `torch.device`, *optional*):
|
59
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
60
|
+
"""
|
51
61
|
self.timesteps = torch.linspace(1, self.config.sampling_eps, num_inference_steps, device=device)
|
52
62
|
|
53
63
|
def step_pred(self, score, x, t, generator=None):
|
64
|
+
"""
|
65
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
66
|
+
process from the learned model outputs (most often the predicted noise).
|
67
|
+
|
68
|
+
Args:
|
69
|
+
score ():
|
70
|
+
x ():
|
71
|
+
t ():
|
72
|
+
generator (`torch.Generator`, *optional*):
|
73
|
+
A random number generator.
|
74
|
+
"""
|
54
75
|
if self.timesteps is None:
|
55
76
|
raise ValueError(
|
56
77
|
"`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler"
|
@@ -28,14 +28,14 @@ from .scheduling_utils import SchedulerMixin
|
|
28
28
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->UnCLIP
|
29
29
|
class UnCLIPSchedulerOutput(BaseOutput):
|
30
30
|
"""
|
31
|
-
Output class for the scheduler's step function output.
|
31
|
+
Output class for the scheduler's `step` function output.
|
32
32
|
|
33
33
|
Args:
|
34
34
|
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
35
|
-
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
|
35
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
36
36
|
denoising loop.
|
37
37
|
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
38
|
-
The predicted denoised sample (x_{0}) based on the model output from the current timestep.
|
38
|
+
The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
|
39
39
|
`pred_original_sample` can be used to preview progress or for guidance.
|
40
40
|
"""
|
41
41
|
|