diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +3 -1
- diffusers/commands/fp16_safetensors.py +2 -7
- diffusers/configuration_utils.py +23 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/loaders.py +62 -64
- diffusers/models/__init__.py +1 -0
- diffusers/models/activations.py +2 -0
- diffusers/models/attention.py +45 -1
- diffusers/models/autoencoder_tiny.py +193 -0
- diffusers/models/controlnet.py +1 -1
- diffusers/models/embeddings.py +56 -0
- diffusers/models/lora.py +0 -6
- diffusers/models/modeling_flax_utils.py +28 -2
- diffusers/models/modeling_utils.py +33 -16
- diffusers/models/transformer_2d.py +26 -9
- diffusers/models/unet_1d.py +2 -2
- diffusers/models/unet_2d_blocks.py +106 -56
- diffusers/models/unet_2d_condition.py +20 -5
- diffusers/models/vae.py +106 -1
- diffusers/pipelines/__init__.py +1 -0
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
- diffusers/pipelines/auto_pipeline.py +33 -43
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
- diffusers/pipelines/pipeline_flax_utils.py +41 -4
- diffusers/pipelines/pipeline_utils.py +60 -16
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/__init__.py +1 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
- diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
- diffusers/schedulers/scheduling_consistency_models.py +70 -57
- diffusers/schedulers/scheduling_ddim.py +76 -71
- diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
- diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
- diffusers/schedulers/scheduling_ddpm.py +68 -67
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
- diffusers/schedulers/scheduling_deis_multistep.py +93 -85
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
- diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
- diffusers/schedulers/scheduling_euler_discrete.py +63 -56
- diffusers/schedulers/scheduling_heun_discrete.py +57 -45
- diffusers/schedulers/scheduling_ipndm.py +27 -22
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
- diffusers/schedulers/scheduling_karras_ve.py +55 -45
- diffusers/schedulers/scheduling_lms_discrete.py +58 -52
- diffusers/schedulers/scheduling_pndm.py +77 -62
- diffusers/schedulers/scheduling_repaint.py +56 -38
- diffusers/schedulers/scheduling_sde_ve.py +62 -50
- diffusers/schedulers/scheduling_sde_vp.py +32 -11
- diffusers/schedulers/scheduling_unclip.py +3 -3
- diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
- diffusers/schedulers/scheduling_utils.py +41 -35
- diffusers/schedulers/scheduling_utils_flax.py +8 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
- diffusers/utils/hub_utils.py +105 -2
- diffusers/utils/import_utils.py +0 -4
- diffusers/utils/pil_utils.py +19 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
- diffusers/models/cross_attention.py +0 -94
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -31,14 +31,14 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
|
|
31
31
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput
|
32
32
|
class DDIMParallelSchedulerOutput(BaseOutput):
|
33
33
|
"""
|
34
|
-
Output class for the scheduler's step function output.
|
34
|
+
Output class for the scheduler's `step` function output.
|
35
35
|
|
36
36
|
Args:
|
37
37
|
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
38
|
-
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
|
38
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
39
39
|
denoising loop.
|
40
40
|
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
41
|
-
The predicted denoised sample (x_{0}) based on the model output from the current timestep.
|
41
|
+
The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
|
42
42
|
`pred_original_sample` can be used to preview progress or for guidance.
|
43
43
|
"""
|
44
44
|
|
@@ -250,11 +250,14 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
250
250
|
current timestep.
|
251
251
|
|
252
252
|
Args:
|
253
|
-
sample (`torch.FloatTensor`):
|
254
|
-
|
253
|
+
sample (`torch.FloatTensor`):
|
254
|
+
The input sample.
|
255
|
+
timestep (`int`, *optional*):
|
256
|
+
The current timestep in the diffusion chain.
|
255
257
|
|
256
258
|
Returns:
|
257
|
-
`torch.FloatTensor`:
|
259
|
+
`torch.FloatTensor`:
|
260
|
+
A scaled input sample.
|
258
261
|
"""
|
259
262
|
return sample
|
260
263
|
|
@@ -320,11 +323,11 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
320
323
|
# Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler.set_timesteps
|
321
324
|
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
322
325
|
"""
|
323
|
-
Sets the discrete timesteps used for the diffusion chain
|
326
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
324
327
|
|
325
328
|
Args:
|
326
329
|
num_inference_steps (`int`):
|
327
|
-
|
330
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
328
331
|
"""
|
329
332
|
|
330
333
|
if num_inference_steps > self.config.num_train_timesteps:
|
@@ -29,14 +29,14 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
|
|
29
29
|
@dataclass
|
30
30
|
class DDPMSchedulerOutput(BaseOutput):
|
31
31
|
"""
|
32
|
-
Output class for the scheduler's step function output.
|
32
|
+
Output class for the scheduler's `step` function output.
|
33
33
|
|
34
34
|
Args:
|
35
35
|
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
36
|
-
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
|
36
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
37
37
|
denoising loop.
|
38
38
|
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
39
|
-
The predicted denoised sample (x_{0}) based on the model output from the current timestep.
|
39
|
+
The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
|
40
40
|
`pred_original_sample` can be used to preview progress or for guidance.
|
41
41
|
"""
|
42
42
|
|
@@ -90,52 +90,46 @@ def betas_for_alpha_bar(
|
|
90
90
|
|
91
91
|
class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
92
92
|
"""
|
93
|
-
|
94
|
-
Langevin dynamics sampling.
|
93
|
+
`DDPMScheduler` explores the connections between denoising score matching and Langevin dynamics sampling.
|
95
94
|
|
96
|
-
|
97
|
-
|
98
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
99
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
100
|
-
|
101
|
-
For more details, see the original paper: https://arxiv.org/abs/2006.11239
|
95
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
96
|
+
methods the library implements for all schedulers such as loading and saving.
|
102
97
|
|
103
98
|
Args:
|
104
|
-
num_train_timesteps (`int
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
dynamic_thresholding_ratio (`float`,
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
stable diffusion.
|
99
|
+
num_train_timesteps (`int`, defaults to 1000):
|
100
|
+
The number of diffusion steps to train the model.
|
101
|
+
beta_start (`float`, defaults to 0.0001):
|
102
|
+
The starting `beta` value of inference.
|
103
|
+
beta_end (`float`, defaults to 0.02):
|
104
|
+
The final `beta` value.
|
105
|
+
beta_schedule (`str`, defaults to `"linear"`):
|
106
|
+
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
107
|
+
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
|
108
|
+
variance_type (`str`, defaults to `"fixed_small"`):
|
109
|
+
Clip the variance when adding noise to the denoised sample. Choose from `fixed_small`, `fixed_small_log`,
|
110
|
+
`fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
|
111
|
+
clip_sample (`bool`, defaults to `True`):
|
112
|
+
Clip the predicted sample for numerical stability.
|
113
|
+
clip_sample_range (`float`, defaults to 1.0):
|
114
|
+
The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
|
115
|
+
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
116
|
+
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
117
|
+
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
118
|
+
Video](https://imagen.research.google/video/paper.pdf) paper).
|
119
|
+
thresholding (`bool`, defaults to `False`):
|
120
|
+
Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
|
121
|
+
as Stable Diffusion.
|
122
|
+
dynamic_thresholding_ratio (`float`, defaults to 0.995):
|
123
|
+
The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
|
124
|
+
sample_max_value (`float`, defaults to 1.0):
|
125
|
+
The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
|
126
|
+
timestep_spacing (`str`, defaults to `"leading"`):
|
127
|
+
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
128
|
+
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
129
|
+
steps_offset (`int`, defaults to 0):
|
130
|
+
An offset added to the inference steps. You can use a combination of `offset=1` and
|
131
|
+
`set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
|
132
|
+
Diffusion.
|
139
133
|
"""
|
140
134
|
|
141
135
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -198,11 +192,14 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
198
192
|
current timestep.
|
199
193
|
|
200
194
|
Args:
|
201
|
-
sample (`torch.FloatTensor`):
|
202
|
-
|
195
|
+
sample (`torch.FloatTensor`):
|
196
|
+
The input sample.
|
197
|
+
timestep (`int`, *optional*):
|
198
|
+
The current timestep in the diffusion chain.
|
203
199
|
|
204
200
|
Returns:
|
205
|
-
`torch.FloatTensor`:
|
201
|
+
`torch.FloatTensor`:
|
202
|
+
A scaled input sample.
|
206
203
|
"""
|
207
204
|
return sample
|
208
205
|
|
@@ -213,18 +210,18 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
213
210
|
timesteps: Optional[List[int]] = None,
|
214
211
|
):
|
215
212
|
"""
|
216
|
-
Sets the discrete timesteps used for the diffusion chain
|
213
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
217
214
|
|
218
215
|
Args:
|
219
|
-
num_inference_steps (`
|
220
|
-
|
216
|
+
num_inference_steps (`int`):
|
217
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
221
218
|
`timesteps` must be `None`.
|
222
|
-
device (`str` or `torch.device`, optional):
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
timestep spacing strategy of equal spacing between timesteps is used. If passed,
|
227
|
-
must be `None`.
|
219
|
+
device (`str` or `torch.device`, *optional*):
|
220
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
221
|
+
timesteps (`List[int]`, *optional*):
|
222
|
+
Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
|
223
|
+
timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed,
|
224
|
+
`num_inference_steps` must be `None`.
|
228
225
|
|
229
226
|
"""
|
230
227
|
if num_inference_steps is not None and timesteps is not None:
|
@@ -364,21 +361,25 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
364
361
|
return_dict: bool = True,
|
365
362
|
) -> Union[DDPMSchedulerOutput, Tuple]:
|
366
363
|
"""
|
367
|
-
Predict the sample
|
364
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
368
365
|
process from the learned model outputs (most often the predicted noise).
|
369
366
|
|
370
367
|
Args:
|
371
|
-
model_output (`torch.FloatTensor`):
|
372
|
-
|
368
|
+
model_output (`torch.FloatTensor`):
|
369
|
+
The direct output from learned diffusion model.
|
370
|
+
timestep (`float`):
|
371
|
+
The current discrete timestep in the diffusion chain.
|
373
372
|
sample (`torch.FloatTensor`):
|
374
|
-
current instance of sample
|
375
|
-
generator
|
376
|
-
|
373
|
+
A current instance of a sample created by the diffusion process.
|
374
|
+
generator (`torch.Generator`, *optional*):
|
375
|
+
A random number generator.
|
376
|
+
return_dict (`bool`, *optional*, defaults to `True`):
|
377
|
+
Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`.
|
377
378
|
|
378
379
|
Returns:
|
379
|
-
[`~schedulers.
|
380
|
-
|
381
|
-
|
380
|
+
[`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`:
|
381
|
+
If return_dict is `True`, [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] is returned, otherwise a
|
382
|
+
tuple is returned where the first element is the sample tensor.
|
382
383
|
|
383
384
|
"""
|
384
385
|
t = timestep
|
@@ -30,14 +30,14 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
|
|
30
30
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput
|
31
31
|
class DDPMParallelSchedulerOutput(BaseOutput):
|
32
32
|
"""
|
33
|
-
Output class for the scheduler's step function output.
|
33
|
+
Output class for the scheduler's `step` function output.
|
34
34
|
|
35
35
|
Args:
|
36
36
|
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
37
|
-
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
|
37
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
38
38
|
denoising loop.
|
39
39
|
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
40
|
-
The predicted denoised sample (x_{0}) based on the model output from the current timestep.
|
40
|
+
The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
|
41
41
|
`pred_original_sample` can be used to preview progress or for guidance.
|
42
42
|
"""
|
43
43
|
|
@@ -203,11 +203,14 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
203
203
|
current timestep.
|
204
204
|
|
205
205
|
Args:
|
206
|
-
sample (`torch.FloatTensor`):
|
207
|
-
|
206
|
+
sample (`torch.FloatTensor`):
|
207
|
+
The input sample.
|
208
|
+
timestep (`int`, *optional*):
|
209
|
+
The current timestep in the diffusion chain.
|
208
210
|
|
209
211
|
Returns:
|
210
|
-
`torch.FloatTensor`:
|
212
|
+
`torch.FloatTensor`:
|
213
|
+
A scaled input sample.
|
211
214
|
"""
|
212
215
|
return sample
|
213
216
|
|
@@ -219,18 +222,18 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
219
222
|
timesteps: Optional[List[int]] = None,
|
220
223
|
):
|
221
224
|
"""
|
222
|
-
Sets the discrete timesteps used for the diffusion chain
|
225
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
223
226
|
|
224
227
|
Args:
|
225
|
-
num_inference_steps (`
|
226
|
-
|
228
|
+
num_inference_steps (`int`):
|
229
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
227
230
|
`timesteps` must be `None`.
|
228
|
-
device (`str` or `torch.device`, optional):
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
timestep spacing strategy of equal spacing between timesteps is used. If passed,
|
233
|
-
must be `None`.
|
231
|
+
device (`str` or `torch.device`, *optional*):
|
232
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
233
|
+
timesteps (`List[int]`, *optional*):
|
234
|
+
Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
|
235
|
+
timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed,
|
236
|
+
`num_inference_steps` must be `None`.
|
234
237
|
|
235
238
|
"""
|
236
239
|
if num_inference_steps is not None and timesteps is not None:
|
@@ -72,63 +72,51 @@ def betas_for_alpha_bar(
|
|
72
72
|
|
73
73
|
class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
74
74
|
"""
|
75
|
-
|
76
|
-
polynomial fitting formula in log-rho space instead of the original linear t space in DEIS paper. The modification
|
77
|
-
enjoys closed-form coefficients for exponential multistep update instead of replying on the numerical solver. More
|
78
|
-
variants of DEIS can be found in https://github.com/qsh-zh/deis.
|
75
|
+
`DEISMultistepScheduler` is a fast high order solver for diffusion ordinary differential equations (ODEs).
|
79
76
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
We also support the "dynamic thresholding" method in Imagen (https://arxiv.org/abs/2205.11487). For pixel-space
|
84
|
-
diffusion models, you can set `thresholding=True` to use the dynamic thresholding.
|
85
|
-
|
86
|
-
[`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
|
87
|
-
function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
|
88
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
89
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
77
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
78
|
+
methods the library implements for all schedulers such as loading and saving.
|
90
79
|
|
91
80
|
Args:
|
92
|
-
num_train_timesteps (`int
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
81
|
+
num_train_timesteps (`int`, defaults to 1000):
|
82
|
+
The number of diffusion steps to train the model.
|
83
|
+
beta_start (`float`, defaults to 0.0001):
|
84
|
+
The starting `beta` value of inference.
|
85
|
+
beta_end (`float`, defaults to 0.02):
|
86
|
+
The final `beta` value.
|
87
|
+
beta_schedule (`str`, defaults to `"linear"`):
|
88
|
+
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
97
89
|
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
|
98
|
-
trained_betas (`np.ndarray`, optional):
|
99
|
-
|
100
|
-
solver_order (`int`,
|
101
|
-
|
102
|
-
`solver_order=3` for unconditional sampling.
|
103
|
-
prediction_type (`str`,
|
104
|
-
|
105
|
-
or `
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
dynamic_thresholding_ratio (`float`,
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
the
|
118
|
-
lower_order_final (`bool`, default `True`):
|
119
|
-
whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. We empirically
|
120
|
-
find this trick can stabilize the sampling of DEIS for steps < 15, especially for steps <= 10.
|
90
|
+
trained_betas (`np.ndarray`, *optional*):
|
91
|
+
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
|
92
|
+
solver_order (`int`, defaults to 2):
|
93
|
+
The DEIS order which can be `1` or `2` or `3`. It is recommended to use `solver_order=2` for guided
|
94
|
+
sampling, and `solver_order=3` for unconditional sampling.
|
95
|
+
prediction_type (`str`, defaults to `epsilon`):
|
96
|
+
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
97
|
+
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
98
|
+
Video](https://imagen.research.google/video/paper.pdf) paper).
|
99
|
+
thresholding (`bool`, defaults to `False`):
|
100
|
+
Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
|
101
|
+
as Stable Diffusion.
|
102
|
+
dynamic_thresholding_ratio (`float`, defaults to 0.995):
|
103
|
+
The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
|
104
|
+
sample_max_value (`float`, defaults to 1.0):
|
105
|
+
The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
|
106
|
+
algorithm_type (`str`, defaults to `deis`):
|
107
|
+
The algorithm type for the solver.
|
108
|
+
lower_order_final (`bool`, defaults to `True`):
|
109
|
+
Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps.
|
121
110
|
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
stable diffusion.
|
111
|
+
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
112
|
+
the sigmas are determined according to a sequence of noise levels {σi}.
|
113
|
+
timestep_spacing (`str`, defaults to `"linspace"`):
|
114
|
+
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
115
|
+
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
116
|
+
steps_offset (`int`, defaults to 0):
|
117
|
+
An offset added to the inference steps. You can use a combination of `offset=1` and
|
118
|
+
`set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
|
119
|
+
Diffusion.
|
132
120
|
"""
|
133
121
|
|
134
122
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -201,13 +189,13 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
201
189
|
|
202
190
|
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
203
191
|
"""
|
204
|
-
Sets the timesteps used for the diffusion chain
|
192
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
205
193
|
|
206
194
|
Args:
|
207
195
|
num_inference_steps (`int`):
|
208
|
-
|
209
|
-
device (`str` or `torch.device`, optional):
|
210
|
-
|
196
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
197
|
+
device (`str` or `torch.device`, *optional*):
|
198
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
211
199
|
"""
|
212
200
|
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
|
213
201
|
if self.config.timestep_spacing == "linspace":
|
@@ -296,16 +284,19 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
296
284
|
self, model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor
|
297
285
|
) -> torch.FloatTensor:
|
298
286
|
"""
|
299
|
-
Convert the model output to the corresponding type
|
287
|
+
Convert the model output to the corresponding type the DEIS algorithm needs.
|
300
288
|
|
301
289
|
Args:
|
302
|
-
model_output (`torch.FloatTensor`):
|
303
|
-
|
290
|
+
model_output (`torch.FloatTensor`):
|
291
|
+
The direct output from the learned diffusion model.
|
292
|
+
timestep (`int`):
|
293
|
+
The current discrete timestep in the diffusion chain.
|
304
294
|
sample (`torch.FloatTensor`):
|
305
|
-
current instance of sample
|
295
|
+
A current instance of a sample created by the diffusion process.
|
306
296
|
|
307
297
|
Returns:
|
308
|
-
`torch.FloatTensor`:
|
298
|
+
`torch.FloatTensor`:
|
299
|
+
The converted model output.
|
309
300
|
"""
|
310
301
|
if self.config.prediction_type == "epsilon":
|
311
302
|
alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep]
|
@@ -341,14 +332,18 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
341
332
|
One step for the first-order DEIS (equivalent to DDIM).
|
342
333
|
|
343
334
|
Args:
|
344
|
-
model_output (`torch.FloatTensor`):
|
345
|
-
|
346
|
-
|
335
|
+
model_output (`torch.FloatTensor`):
|
336
|
+
The direct output from the learned diffusion model.
|
337
|
+
timestep (`int`):
|
338
|
+
The current discrete timestep in the diffusion chain.
|
339
|
+
prev_timestep (`int`):
|
340
|
+
The previous discrete timestep in the diffusion chain.
|
347
341
|
sample (`torch.FloatTensor`):
|
348
|
-
current instance of sample
|
342
|
+
A current instance of a sample created by the diffusion process.
|
349
343
|
|
350
344
|
Returns:
|
351
|
-
`torch.FloatTensor`:
|
345
|
+
`torch.FloatTensor`:
|
346
|
+
The sample tensor at the previous timestep.
|
352
347
|
"""
|
353
348
|
lambda_t, lambda_s = self.lambda_t[prev_timestep], self.lambda_t[timestep]
|
354
349
|
alpha_t, alpha_s = self.alpha_t[prev_timestep], self.alpha_t[timestep]
|
@@ -372,14 +367,17 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
372
367
|
|
373
368
|
Args:
|
374
369
|
model_output_list (`List[torch.FloatTensor]`):
|
375
|
-
direct outputs from learned diffusion model at current and latter timesteps.
|
376
|
-
timestep (`int`):
|
377
|
-
|
370
|
+
The direct outputs from learned diffusion model at current and latter timesteps.
|
371
|
+
timestep (`int`):
|
372
|
+
The current and latter discrete timestep in the diffusion chain.
|
373
|
+
prev_timestep (`int`):
|
374
|
+
The previous discrete timestep in the diffusion chain.
|
378
375
|
sample (`torch.FloatTensor`):
|
379
|
-
current instance of sample
|
376
|
+
A current instance of a sample created by the diffusion process.
|
380
377
|
|
381
378
|
Returns:
|
382
|
-
`torch.FloatTensor`:
|
379
|
+
`torch.FloatTensor`:
|
380
|
+
The sample tensor at the previous timestep.
|
383
381
|
"""
|
384
382
|
t, s0, s1 = prev_timestep, timestep_list[-1], timestep_list[-2]
|
385
383
|
m0, m1 = model_output_list[-1], model_output_list[-2]
|
@@ -414,14 +412,17 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
414
412
|
|
415
413
|
Args:
|
416
414
|
model_output_list (`List[torch.FloatTensor]`):
|
417
|
-
direct outputs from learned diffusion model at current and latter timesteps.
|
418
|
-
timestep (`int`):
|
419
|
-
|
415
|
+
The direct outputs from learned diffusion model at current and latter timesteps.
|
416
|
+
timestep (`int`):
|
417
|
+
The current and latter discrete timestep in the diffusion chain.
|
418
|
+
prev_timestep (`int`):
|
419
|
+
The previous discrete timestep in the diffusion chain.
|
420
420
|
sample (`torch.FloatTensor`):
|
421
|
-
current instance of sample
|
421
|
+
A current instance of a sample created by diffusion process.
|
422
422
|
|
423
423
|
Returns:
|
424
|
-
`torch.FloatTensor`:
|
424
|
+
`torch.FloatTensor`:
|
425
|
+
The sample tensor at the previous timestep.
|
425
426
|
"""
|
426
427
|
t, s0, s1, s2 = prev_timestep, timestep_list[-1], timestep_list[-2], timestep_list[-3]
|
427
428
|
m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3]
|
@@ -467,18 +468,23 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
467
468
|
return_dict: bool = True,
|
468
469
|
) -> Union[SchedulerOutput, Tuple]:
|
469
470
|
"""
|
470
|
-
|
471
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
|
472
|
+
the multistep DEIS.
|
471
473
|
|
472
474
|
Args:
|
473
|
-
model_output (`torch.FloatTensor`):
|
474
|
-
|
475
|
+
model_output (`torch.FloatTensor`):
|
476
|
+
The direct output from learned diffusion model.
|
477
|
+
timestep (`float`):
|
478
|
+
The current discrete timestep in the diffusion chain.
|
475
479
|
sample (`torch.FloatTensor`):
|
476
|
-
current instance of sample
|
477
|
-
return_dict (`bool`):
|
480
|
+
A current instance of a sample created by the diffusion process.
|
481
|
+
return_dict (`bool`):
|
482
|
+
Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`.
|
478
483
|
|
479
484
|
Returns:
|
480
|
-
[`~scheduling_utils.SchedulerOutput`] or `tuple`:
|
481
|
-
|
485
|
+
[`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
|
486
|
+
If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
|
487
|
+
tuple is returned where the first element is the sample tensor.
|
482
488
|
|
483
489
|
"""
|
484
490
|
if self.num_inference_steps is None:
|
@@ -533,10 +539,12 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
533
539
|
current timestep.
|
534
540
|
|
535
541
|
Args:
|
536
|
-
sample (`torch.FloatTensor`):
|
542
|
+
sample (`torch.FloatTensor`):
|
543
|
+
The input sample.
|
537
544
|
|
538
545
|
Returns:
|
539
|
-
`torch.FloatTensor`:
|
546
|
+
`torch.FloatTensor`:
|
547
|
+
A scaled input sample.
|
540
548
|
"""
|
541
549
|
return sample
|
542
550
|
|