diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. diffusers/__init__.py +3 -1
  2. diffusers/commands/fp16_safetensors.py +2 -7
  3. diffusers/configuration_utils.py +23 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/loaders.py +62 -64
  6. diffusers/models/__init__.py +1 -0
  7. diffusers/models/activations.py +2 -0
  8. diffusers/models/attention.py +45 -1
  9. diffusers/models/autoencoder_tiny.py +193 -0
  10. diffusers/models/controlnet.py +1 -1
  11. diffusers/models/embeddings.py +56 -0
  12. diffusers/models/lora.py +0 -6
  13. diffusers/models/modeling_flax_utils.py +28 -2
  14. diffusers/models/modeling_utils.py +33 -16
  15. diffusers/models/transformer_2d.py +26 -9
  16. diffusers/models/unet_1d.py +2 -2
  17. diffusers/models/unet_2d_blocks.py +106 -56
  18. diffusers/models/unet_2d_condition.py +20 -5
  19. diffusers/models/vae.py +106 -1
  20. diffusers/pipelines/__init__.py +1 -0
  21. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
  22. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
  23. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
  24. diffusers/pipelines/auto_pipeline.py +33 -43
  25. diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
  26. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
  27. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
  28. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
  29. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
  30. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
  31. diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
  32. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
  33. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
  34. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
  35. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
  36. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
  37. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
  38. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
  39. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  40. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  41. diffusers/pipelines/pipeline_flax_utils.py +41 -4
  42. diffusers/pipelines/pipeline_utils.py +60 -16
  43. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
  44. diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  45. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
  46. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
  47. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
  48. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
  49. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
  50. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
  51. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
  52. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
  53. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
  54. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
  55. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
  56. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
  57. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
  58. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
  59. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
  60. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
  61. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
  65. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
  66. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
  67. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
  68. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
  69. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
  70. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
  71. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
  72. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
  73. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
  74. diffusers/schedulers/scheduling_consistency_models.py +70 -57
  75. diffusers/schedulers/scheduling_ddim.py +76 -71
  76. diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
  77. diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
  78. diffusers/schedulers/scheduling_ddpm.py +68 -67
  79. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
  80. diffusers/schedulers/scheduling_deis_multistep.py +93 -85
  81. diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
  82. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
  83. diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
  84. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
  85. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
  86. diffusers/schedulers/scheduling_euler_discrete.py +63 -56
  87. diffusers/schedulers/scheduling_heun_discrete.py +57 -45
  88. diffusers/schedulers/scheduling_ipndm.py +27 -22
  89. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
  90. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
  91. diffusers/schedulers/scheduling_karras_ve.py +55 -45
  92. diffusers/schedulers/scheduling_lms_discrete.py +58 -52
  93. diffusers/schedulers/scheduling_pndm.py +77 -62
  94. diffusers/schedulers/scheduling_repaint.py +56 -38
  95. diffusers/schedulers/scheduling_sde_ve.py +62 -50
  96. diffusers/schedulers/scheduling_sde_vp.py +32 -11
  97. diffusers/schedulers/scheduling_unclip.py +3 -3
  98. diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
  99. diffusers/schedulers/scheduling_utils.py +41 -35
  100. diffusers/schedulers/scheduling_utils_flax.py +8 -2
  101. diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
  102. diffusers/utils/__init__.py +2 -2
  103. diffusers/utils/dummy_pt_objects.py +15 -0
  104. diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
  105. diffusers/utils/hub_utils.py +105 -2
  106. diffusers/utils/import_utils.py +0 -4
  107. diffusers/utils/pil_utils.py +19 -0
  108. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
  109. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
  110. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
  111. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
  112. diffusers/models/cross_attention.py +0 -94
  113. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
  114. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -31,14 +31,14 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
31
31
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput
32
32
  class DDIMParallelSchedulerOutput(BaseOutput):
33
33
  """
34
- Output class for the scheduler's step function output.
34
+ Output class for the scheduler's `step` function output.
35
35
 
36
36
  Args:
37
37
  prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
38
- Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
38
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
39
39
  denoising loop.
40
40
  pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
41
- The predicted denoised sample (x_{0}) based on the model output from the current timestep.
41
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
42
42
  `pred_original_sample` can be used to preview progress or for guidance.
43
43
  """
44
44
 
@@ -250,11 +250,14 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
250
250
  current timestep.
251
251
 
252
252
  Args:
253
- sample (`torch.FloatTensor`): input sample
254
- timestep (`int`, optional): current timestep
253
+ sample (`torch.FloatTensor`):
254
+ The input sample.
255
+ timestep (`int`, *optional*):
256
+ The current timestep in the diffusion chain.
255
257
 
256
258
  Returns:
257
- `torch.FloatTensor`: scaled input sample
259
+ `torch.FloatTensor`:
260
+ A scaled input sample.
258
261
  """
259
262
  return sample
260
263
 
@@ -320,11 +323,11 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
320
323
  # Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler.set_timesteps
321
324
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
322
325
  """
323
- Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
326
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
324
327
 
325
328
  Args:
326
329
  num_inference_steps (`int`):
327
- the number of diffusion steps used when generating samples with a pre-trained model.
330
+ The number of diffusion steps used when generating samples with a pre-trained model.
328
331
  """
329
332
 
330
333
  if num_inference_steps > self.config.num_train_timesteps:
@@ -29,14 +29,14 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
29
29
  @dataclass
30
30
  class DDPMSchedulerOutput(BaseOutput):
31
31
  """
32
- Output class for the scheduler's step function output.
32
+ Output class for the scheduler's `step` function output.
33
33
 
34
34
  Args:
35
35
  prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
36
- Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
36
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
37
37
  denoising loop.
38
38
  pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
39
- The predicted denoised sample (x_{0}) based on the model output from the current timestep.
39
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
40
40
  `pred_original_sample` can be used to preview progress or for guidance.
41
41
  """
42
42
 
@@ -90,52 +90,46 @@ def betas_for_alpha_bar(
90
90
 
91
91
  class DDPMScheduler(SchedulerMixin, ConfigMixin):
92
92
  """
93
- Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and
94
- Langevin dynamics sampling.
93
+ `DDPMScheduler` explores the connections between denoising score matching and Langevin dynamics sampling.
95
94
 
96
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
97
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
98
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
99
- [`~SchedulerMixin.from_pretrained`] functions.
100
-
101
- For more details, see the original paper: https://arxiv.org/abs/2006.11239
95
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
96
+ methods the library implements for all schedulers such as loading and saving.
102
97
 
103
98
  Args:
104
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
105
- beta_start (`float`): the starting `beta` value of inference.
106
- beta_end (`float`): the final `beta` value.
107
- beta_schedule (`str`):
108
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
109
- `linear`, `scaled_linear`, `squaredcos_cap_v2` or `sigmoid`.
110
- trained_betas (`np.ndarray`, optional):
111
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
112
- variance_type (`str`):
113
- options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`,
114
- `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
115
- clip_sample (`bool`, default `True`):
116
- option to clip predicted sample for numerical stability.
117
- clip_sample_range (`float`, default `1.0`):
118
- the maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
119
- prediction_type (`str`, default `epsilon`, optional):
120
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
121
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
122
- https://imagen.research.google/video/paper.pdf)
123
- thresholding (`bool`, default `False`):
124
- whether to use the "dynamic thresholding" method (introduced by Imagen, https://arxiv.org/abs/2205.11487).
125
- Note that the thresholding method is unsuitable for latent-space diffusion models (such as
126
- stable-diffusion).
127
- dynamic_thresholding_ratio (`float`, default `0.995`):
128
- the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
129
- (https://arxiv.org/abs/2205.11487). Valid only when `thresholding=True`.
130
- sample_max_value (`float`, default `1.0`):
131
- the threshold value for dynamic thresholding. Valid only when `thresholding=True`.
132
- timestep_spacing (`str`, default `"leading"`):
133
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
134
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
135
- steps_offset (`int`, default `0`):
136
- an offset added to the inference steps. You can use a combination of `offset=1` and
137
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
138
- stable diffusion.
99
+ num_train_timesteps (`int`, defaults to 1000):
100
+ The number of diffusion steps to train the model.
101
+ beta_start (`float`, defaults to 0.0001):
102
+ The starting `beta` value of inference.
103
+ beta_end (`float`, defaults to 0.02):
104
+ The final `beta` value.
105
+ beta_schedule (`str`, defaults to `"linear"`):
106
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
107
+ `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
108
+ variance_type (`str`, defaults to `"fixed_small"`):
109
+ Clip the variance when adding noise to the denoised sample. Choose from `fixed_small`, `fixed_small_log`,
110
+ `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
111
+ clip_sample (`bool`, defaults to `True`):
112
+ Clip the predicted sample for numerical stability.
113
+ clip_sample_range (`float`, defaults to 1.0):
114
+ The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
115
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
116
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
117
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
118
+ Video](https://imagen.research.google/video/paper.pdf) paper).
119
+ thresholding (`bool`, defaults to `False`):
120
+ Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
121
+ as Stable Diffusion.
122
+ dynamic_thresholding_ratio (`float`, defaults to 0.995):
123
+ The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
124
+ sample_max_value (`float`, defaults to 1.0):
125
+ The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
126
+ timestep_spacing (`str`, defaults to `"leading"`):
127
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
128
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
129
+ steps_offset (`int`, defaults to 0):
130
+ An offset added to the inference steps. You can use a combination of `offset=1` and
131
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
132
+ Diffusion.
139
133
  """
140
134
 
141
135
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -198,11 +192,14 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
198
192
  current timestep.
199
193
 
200
194
  Args:
201
- sample (`torch.FloatTensor`): input sample
202
- timestep (`int`, optional): current timestep
195
+ sample (`torch.FloatTensor`):
196
+ The input sample.
197
+ timestep (`int`, *optional*):
198
+ The current timestep in the diffusion chain.
203
199
 
204
200
  Returns:
205
- `torch.FloatTensor`: scaled input sample
201
+ `torch.FloatTensor`:
202
+ A scaled input sample.
206
203
  """
207
204
  return sample
208
205
 
@@ -213,18 +210,18 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
213
210
  timesteps: Optional[List[int]] = None,
214
211
  ):
215
212
  """
216
- Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
213
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
217
214
 
218
215
  Args:
219
- num_inference_steps (`Optional[int]`):
220
- the number of diffusion steps used when generating samples with a pre-trained model. If passed, then
216
+ num_inference_steps (`int`):
217
+ The number of diffusion steps used when generating samples with a pre-trained model. If used,
221
218
  `timesteps` must be `None`.
222
- device (`str` or `torch.device`, optional):
223
- the device to which the timesteps are moved to.
224
- custom_timesteps (`List[int]`, optional):
225
- custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
226
- timestep spacing strategy of equal spacing between timesteps is used. If passed, `num_inference_steps`
227
- must be `None`.
219
+ device (`str` or `torch.device`, *optional*):
220
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
221
+ timesteps (`List[int]`, *optional*):
222
+ Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
223
+ timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed,
224
+ `num_inference_steps` must be `None`.
228
225
 
229
226
  """
230
227
  if num_inference_steps is not None and timesteps is not None:
@@ -364,21 +361,25 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
364
361
  return_dict: bool = True,
365
362
  ) -> Union[DDPMSchedulerOutput, Tuple]:
366
363
  """
367
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
364
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
368
365
  process from the learned model outputs (most often the predicted noise).
369
366
 
370
367
  Args:
371
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
372
- timestep (`int`): current discrete timestep in the diffusion chain.
368
+ model_output (`torch.FloatTensor`):
369
+ The direct output from learned diffusion model.
370
+ timestep (`float`):
371
+ The current discrete timestep in the diffusion chain.
373
372
  sample (`torch.FloatTensor`):
374
- current instance of sample being created by diffusion process.
375
- generator: random number generator.
376
- return_dict (`bool`): option for returning tuple rather than DDPMSchedulerOutput class
373
+ A current instance of a sample created by the diffusion process.
374
+ generator (`torch.Generator`, *optional*):
375
+ A random number generator.
376
+ return_dict (`bool`, *optional*, defaults to `True`):
377
+ Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`.
377
378
 
378
379
  Returns:
379
- [`~schedulers.scheduling_utils.DDPMSchedulerOutput`] or `tuple`:
380
- [`~schedulers.scheduling_utils.DDPMSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
381
- returning a tuple, the first element is the sample tensor.
380
+ [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`:
381
+ If return_dict is `True`, [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] is returned, otherwise a
382
+ tuple is returned where the first element is the sample tensor.
382
383
 
383
384
  """
384
385
  t = timestep
@@ -30,14 +30,14 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
30
30
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput
31
31
  class DDPMParallelSchedulerOutput(BaseOutput):
32
32
  """
33
- Output class for the scheduler's step function output.
33
+ Output class for the scheduler's `step` function output.
34
34
 
35
35
  Args:
36
36
  prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
37
- Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
37
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
38
38
  denoising loop.
39
39
  pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
40
- The predicted denoised sample (x_{0}) based on the model output from the current timestep.
40
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
41
41
  `pred_original_sample` can be used to preview progress or for guidance.
42
42
  """
43
43
 
@@ -203,11 +203,14 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
203
203
  current timestep.
204
204
 
205
205
  Args:
206
- sample (`torch.FloatTensor`): input sample
207
- timestep (`int`, optional): current timestep
206
+ sample (`torch.FloatTensor`):
207
+ The input sample.
208
+ timestep (`int`, *optional*):
209
+ The current timestep in the diffusion chain.
208
210
 
209
211
  Returns:
210
- `torch.FloatTensor`: scaled input sample
212
+ `torch.FloatTensor`:
213
+ A scaled input sample.
211
214
  """
212
215
  return sample
213
216
 
@@ -219,18 +222,18 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
219
222
  timesteps: Optional[List[int]] = None,
220
223
  ):
221
224
  """
222
- Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
225
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
223
226
 
224
227
  Args:
225
- num_inference_steps (`Optional[int]`):
226
- the number of diffusion steps used when generating samples with a pre-trained model. If passed, then
228
+ num_inference_steps (`int`):
229
+ The number of diffusion steps used when generating samples with a pre-trained model. If used,
227
230
  `timesteps` must be `None`.
228
- device (`str` or `torch.device`, optional):
229
- the device to which the timesteps are moved to.
230
- custom_timesteps (`List[int]`, optional):
231
- custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
232
- timestep spacing strategy of equal spacing between timesteps is used. If passed, `num_inference_steps`
233
- must be `None`.
231
+ device (`str` or `torch.device`, *optional*):
232
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
233
+ timesteps (`List[int]`, *optional*):
234
+ Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
235
+ timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed,
236
+ `num_inference_steps` must be `None`.
234
237
 
235
238
  """
236
239
  if num_inference_steps is not None and timesteps is not None:
@@ -72,63 +72,51 @@ def betas_for_alpha_bar(
72
72
 
73
73
  class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
74
74
  """
75
- DEIS (https://arxiv.org/abs/2204.13902) is a fast high order solver for diffusion ODEs. We slightly modify the
76
- polynomial fitting formula in log-rho space instead of the original linear t space in DEIS paper. The modification
77
- enjoys closed-form coefficients for exponential multistep update instead of replying on the numerical solver. More
78
- variants of DEIS can be found in https://github.com/qsh-zh/deis.
75
+ `DEISMultistepScheduler` is a fast high order solver for diffusion ordinary differential equations (ODEs).
79
76
 
80
- Currently, we support the log-rho multistep DEIS. We recommend to use `solver_order=2 / 3` while `solver_order=1`
81
- reduces to DDIM.
82
-
83
- We also support the "dynamic thresholding" method in Imagen (https://arxiv.org/abs/2205.11487). For pixel-space
84
- diffusion models, you can set `thresholding=True` to use the dynamic thresholding.
85
-
86
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
87
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
88
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
89
- [`~SchedulerMixin.from_pretrained`] functions.
77
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
78
+ methods the library implements for all schedulers such as loading and saving.
90
79
 
91
80
  Args:
92
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
93
- beta_start (`float`): the starting `beta` value of inference.
94
- beta_end (`float`): the final `beta` value.
95
- beta_schedule (`str`):
96
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
81
+ num_train_timesteps (`int`, defaults to 1000):
82
+ The number of diffusion steps to train the model.
83
+ beta_start (`float`, defaults to 0.0001):
84
+ The starting `beta` value of inference.
85
+ beta_end (`float`, defaults to 0.02):
86
+ The final `beta` value.
87
+ beta_schedule (`str`, defaults to `"linear"`):
88
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
97
89
  `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
98
- trained_betas (`np.ndarray`, optional):
99
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
100
- solver_order (`int`, default `2`):
101
- the order of DEIS; can be `1` or `2` or `3`. We recommend to use `solver_order=2` for guided sampling, and
102
- `solver_order=3` for unconditional sampling.
103
- prediction_type (`str`, default `epsilon`):
104
- indicates whether the model predicts the noise (epsilon), or the data / `x0`. One of `epsilon`, `sample`,
105
- or `v-prediction`.
106
- thresholding (`bool`, default `False`):
107
- whether to use the "dynamic thresholding" method (introduced by Imagen, https://arxiv.org/abs/2205.11487).
108
- Note that the thresholding method is unsuitable for latent-space diffusion models (such as
109
- stable-diffusion).
110
- dynamic_thresholding_ratio (`float`, default `0.995`):
111
- the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
112
- (https://arxiv.org/abs/2205.11487).
113
- sample_max_value (`float`, default `1.0`):
114
- the threshold value for dynamic thresholding. Valid only when `thresholding=True`
115
- algorithm_type (`str`, default `deis`):
116
- the algorithm type for the solver. current we support multistep deis, we will add other variants of DEIS in
117
- the future
118
- lower_order_final (`bool`, default `True`):
119
- whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. We empirically
120
- find this trick can stabilize the sampling of DEIS for steps < 15, especially for steps <= 10.
90
+ trained_betas (`np.ndarray`, *optional*):
91
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
92
+ solver_order (`int`, defaults to 2):
93
+ The DEIS order which can be `1` or `2` or `3`. It is recommended to use `solver_order=2` for guided
94
+ sampling, and `solver_order=3` for unconditional sampling.
95
+ prediction_type (`str`, defaults to `epsilon`):
96
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
97
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
98
+ Video](https://imagen.research.google/video/paper.pdf) paper).
99
+ thresholding (`bool`, defaults to `False`):
100
+ Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
101
+ as Stable Diffusion.
102
+ dynamic_thresholding_ratio (`float`, defaults to 0.995):
103
+ The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
104
+ sample_max_value (`float`, defaults to 1.0):
105
+ The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
106
+ algorithm_type (`str`, defaults to `deis`):
107
+ The algorithm type for the solver.
108
+ lower_order_final (`bool`, defaults to `True`):
109
+ Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps.
121
110
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
122
- This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
123
- noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
124
- of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
125
- timestep_spacing (`str`, default `"linspace"`):
126
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
127
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
128
- steps_offset (`int`, default `0`):
129
- an offset added to the inference steps. You can use a combination of `offset=1` and
130
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
131
- stable diffusion.
111
+ Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
112
+ the sigmas are determined according to a sequence of noise levels {σi}.
113
+ timestep_spacing (`str`, defaults to `"linspace"`):
114
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
115
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
116
+ steps_offset (`int`, defaults to 0):
117
+ An offset added to the inference steps. You can use a combination of `offset=1` and
118
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
119
+ Diffusion.
132
120
  """
133
121
 
134
122
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -201,13 +189,13 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
201
189
 
202
190
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
203
191
  """
204
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
192
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
205
193
 
206
194
  Args:
207
195
  num_inference_steps (`int`):
208
- the number of diffusion steps used when generating samples with a pre-trained model.
209
- device (`str` or `torch.device`, optional):
210
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
196
+ The number of diffusion steps used when generating samples with a pre-trained model.
197
+ device (`str` or `torch.device`, *optional*):
198
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
211
199
  """
212
200
  # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
213
201
  if self.config.timestep_spacing == "linspace":
@@ -296,16 +284,19 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
296
284
  self, model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor
297
285
  ) -> torch.FloatTensor:
298
286
  """
299
- Convert the model output to the corresponding type that the algorithm DEIS needs.
287
+ Convert the model output to the corresponding type the DEIS algorithm needs.
300
288
 
301
289
  Args:
302
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
303
- timestep (`int`): current discrete timestep in the diffusion chain.
290
+ model_output (`torch.FloatTensor`):
291
+ The direct output from the learned diffusion model.
292
+ timestep (`int`):
293
+ The current discrete timestep in the diffusion chain.
304
294
  sample (`torch.FloatTensor`):
305
- current instance of sample being created by diffusion process.
295
+ A current instance of a sample created by the diffusion process.
306
296
 
307
297
  Returns:
308
- `torch.FloatTensor`: the converted model output.
298
+ `torch.FloatTensor`:
299
+ The converted model output.
309
300
  """
310
301
  if self.config.prediction_type == "epsilon":
311
302
  alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep]
@@ -341,14 +332,18 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
341
332
  One step for the first-order DEIS (equivalent to DDIM).
342
333
 
343
334
  Args:
344
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
345
- timestep (`int`): current discrete timestep in the diffusion chain.
346
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
335
+ model_output (`torch.FloatTensor`):
336
+ The direct output from the learned diffusion model.
337
+ timestep (`int`):
338
+ The current discrete timestep in the diffusion chain.
339
+ prev_timestep (`int`):
340
+ The previous discrete timestep in the diffusion chain.
347
341
  sample (`torch.FloatTensor`):
348
- current instance of sample being created by diffusion process.
342
+ A current instance of a sample created by the diffusion process.
349
343
 
350
344
  Returns:
351
- `torch.FloatTensor`: the sample tensor at the previous timestep.
345
+ `torch.FloatTensor`:
346
+ The sample tensor at the previous timestep.
352
347
  """
353
348
  lambda_t, lambda_s = self.lambda_t[prev_timestep], self.lambda_t[timestep]
354
349
  alpha_t, alpha_s = self.alpha_t[prev_timestep], self.alpha_t[timestep]
@@ -372,14 +367,17 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
372
367
 
373
368
  Args:
374
369
  model_output_list (`List[torch.FloatTensor]`):
375
- direct outputs from learned diffusion model at current and latter timesteps.
376
- timestep (`int`): current and latter discrete timestep in the diffusion chain.
377
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
370
+ The direct outputs from learned diffusion model at current and latter timesteps.
371
+ timestep (`int`):
372
+ The current and latter discrete timestep in the diffusion chain.
373
+ prev_timestep (`int`):
374
+ The previous discrete timestep in the diffusion chain.
378
375
  sample (`torch.FloatTensor`):
379
- current instance of sample being created by diffusion process.
376
+ A current instance of a sample created by the diffusion process.
380
377
 
381
378
  Returns:
382
- `torch.FloatTensor`: the sample tensor at the previous timestep.
379
+ `torch.FloatTensor`:
380
+ The sample tensor at the previous timestep.
383
381
  """
384
382
  t, s0, s1 = prev_timestep, timestep_list[-1], timestep_list[-2]
385
383
  m0, m1 = model_output_list[-1], model_output_list[-2]
@@ -414,14 +412,17 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
414
412
 
415
413
  Args:
416
414
  model_output_list (`List[torch.FloatTensor]`):
417
- direct outputs from learned diffusion model at current and latter timesteps.
418
- timestep (`int`): current and latter discrete timestep in the diffusion chain.
419
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
415
+ The direct outputs from learned diffusion model at current and latter timesteps.
416
+ timestep (`int`):
417
+ The current and latter discrete timestep in the diffusion chain.
418
+ prev_timestep (`int`):
419
+ The previous discrete timestep in the diffusion chain.
420
420
  sample (`torch.FloatTensor`):
421
- current instance of sample being created by diffusion process.
421
+ A current instance of a sample created by diffusion process.
422
422
 
423
423
  Returns:
424
- `torch.FloatTensor`: the sample tensor at the previous timestep.
424
+ `torch.FloatTensor`:
425
+ The sample tensor at the previous timestep.
425
426
  """
426
427
  t, s0, s1, s2 = prev_timestep, timestep_list[-1], timestep_list[-2], timestep_list[-3]
427
428
  m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3]
@@ -467,18 +468,23 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
467
468
  return_dict: bool = True,
468
469
  ) -> Union[SchedulerOutput, Tuple]:
469
470
  """
470
- Step function propagating the sample with the multistep DEIS.
471
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
472
+ the multistep DEIS.
471
473
 
472
474
  Args:
473
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
474
- timestep (`int`): current discrete timestep in the diffusion chain.
475
+ model_output (`torch.FloatTensor`):
476
+ The direct output from learned diffusion model.
477
+ timestep (`float`):
478
+ The current discrete timestep in the diffusion chain.
475
479
  sample (`torch.FloatTensor`):
476
- current instance of sample being created by diffusion process.
477
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
480
+ A current instance of a sample created by the diffusion process.
481
+ return_dict (`bool`):
482
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`.
478
483
 
479
484
  Returns:
480
- [`~scheduling_utils.SchedulerOutput`] or `tuple`: [`~scheduling_utils.SchedulerOutput`] if `return_dict` is
481
- True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor.
485
+ [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
486
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
487
+ tuple is returned where the first element is the sample tensor.
482
488
 
483
489
  """
484
490
  if self.num_inference_steps is None:
@@ -533,10 +539,12 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
533
539
  current timestep.
534
540
 
535
541
  Args:
536
- sample (`torch.FloatTensor`): input sample
542
+ sample (`torch.FloatTensor`):
543
+ The input sample.
537
544
 
538
545
  Returns:
539
- `torch.FloatTensor`: scaled input sample
546
+ `torch.FloatTensor`:
547
+ A scaled input sample.
540
548
  """
541
549
  return sample
542
550