diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. diffusers/__init__.py +3 -1
  2. diffusers/commands/fp16_safetensors.py +2 -7
  3. diffusers/configuration_utils.py +23 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/loaders.py +62 -64
  6. diffusers/models/__init__.py +1 -0
  7. diffusers/models/activations.py +2 -0
  8. diffusers/models/attention.py +45 -1
  9. diffusers/models/autoencoder_tiny.py +193 -0
  10. diffusers/models/controlnet.py +1 -1
  11. diffusers/models/embeddings.py +56 -0
  12. diffusers/models/lora.py +0 -6
  13. diffusers/models/modeling_flax_utils.py +28 -2
  14. diffusers/models/modeling_utils.py +33 -16
  15. diffusers/models/transformer_2d.py +26 -9
  16. diffusers/models/unet_1d.py +2 -2
  17. diffusers/models/unet_2d_blocks.py +106 -56
  18. diffusers/models/unet_2d_condition.py +20 -5
  19. diffusers/models/vae.py +106 -1
  20. diffusers/pipelines/__init__.py +1 -0
  21. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
  22. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
  23. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
  24. diffusers/pipelines/auto_pipeline.py +33 -43
  25. diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
  26. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
  27. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
  28. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
  29. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
  30. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
  31. diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
  32. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
  33. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
  34. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
  35. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
  36. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
  37. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
  38. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
  39. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  40. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  41. diffusers/pipelines/pipeline_flax_utils.py +41 -4
  42. diffusers/pipelines/pipeline_utils.py +60 -16
  43. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
  44. diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  45. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
  46. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
  47. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
  48. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
  49. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
  50. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
  51. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
  52. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
  53. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
  54. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
  55. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
  56. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
  57. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
  58. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
  59. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
  60. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
  61. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
  65. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
  66. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
  67. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
  68. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
  69. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
  70. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
  71. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
  72. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
  73. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
  74. diffusers/schedulers/scheduling_consistency_models.py +70 -57
  75. diffusers/schedulers/scheduling_ddim.py +76 -71
  76. diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
  77. diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
  78. diffusers/schedulers/scheduling_ddpm.py +68 -67
  79. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
  80. diffusers/schedulers/scheduling_deis_multistep.py +93 -85
  81. diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
  82. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
  83. diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
  84. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
  85. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
  86. diffusers/schedulers/scheduling_euler_discrete.py +63 -56
  87. diffusers/schedulers/scheduling_heun_discrete.py +57 -45
  88. diffusers/schedulers/scheduling_ipndm.py +27 -22
  89. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
  90. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
  91. diffusers/schedulers/scheduling_karras_ve.py +55 -45
  92. diffusers/schedulers/scheduling_lms_discrete.py +58 -52
  93. diffusers/schedulers/scheduling_pndm.py +77 -62
  94. diffusers/schedulers/scheduling_repaint.py +56 -38
  95. diffusers/schedulers/scheduling_sde_ve.py +62 -50
  96. diffusers/schedulers/scheduling_sde_vp.py +32 -11
  97. diffusers/schedulers/scheduling_unclip.py +3 -3
  98. diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
  99. diffusers/schedulers/scheduling_utils.py +41 -35
  100. diffusers/schedulers/scheduling_utils_flax.py +8 -2
  101. diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
  102. diffusers/utils/__init__.py +2 -2
  103. diffusers/utils/dummy_pt_objects.py +15 -0
  104. diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
  105. diffusers/utils/hub_utils.py +105 -2
  106. diffusers/utils/import_utils.py +0 -4
  107. diffusers/utils/pil_utils.py +19 -0
  108. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
  109. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
  110. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
  111. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
  112. diffusers/models/cross_attention.py +0 -94
  113. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
  114. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -70,36 +70,35 @@ def betas_for_alpha_bar(
70
70
 
71
71
  class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
72
72
  """
73
- Scheduler created by @crowsonkb in [k_diffusion](https://github.com/crowsonkb/k-diffusion), see:
74
- https://github.com/crowsonkb/k-diffusion/blob/5b3af030dd83e0297272d861c19477735d0317ec/k_diffusion/sampling.py#L188
73
+ KDPM2DiscreteScheduler is inspired by the DPMSolver2 and Algorithm 2 from the [Elucidating the Design Space of
74
+ Diffusion-Based Generative Models](https://huggingface.co/papers/2206.00364) paper.
75
75
 
76
- Scheduler inspired by DPM-Solver-2 and Algorthim 2 from Karras et al. (2022).
77
-
78
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
79
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
80
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
81
- [`~SchedulerMixin.from_pretrained`] functions.
76
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
77
+ methods the library implements for all schedulers such as loading and saving.
82
78
 
83
79
  Args:
84
- num_train_timesteps (`int`): number of diffusion steps used to train the model. beta_start (`float`): the
85
- starting `beta` value of inference. beta_end (`float`): the final `beta` value. beta_schedule (`str`):
86
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
80
+ num_train_timesteps (`int`, defaults to 1000):
81
+ The number of diffusion steps to train the model.
82
+ beta_start (`float`, defaults to 0.00085):
83
+ The starting `beta` value of inference.
84
+ beta_end (`float`, defaults to 0.012):
85
+ The final `beta` value.
86
+ beta_schedule (`str`, defaults to `"linear"`):
87
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
87
88
  `linear` or `scaled_linear`.
88
- trained_betas (`np.ndarray`, optional):
89
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
90
- options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`,
91
- `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
92
- prediction_type (`str`, default `epsilon`, optional):
93
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
94
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
95
- https://imagen.research.google/video/paper.pdf)
96
- timestep_spacing (`str`, default `"linspace"`):
97
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
98
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
99
- steps_offset (`int`, default `0`):
100
- an offset added to the inference steps. You can use a combination of `offset=1` and
101
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
102
- stable diffusion.
89
+ trained_betas (`np.ndarray`, *optional*):
90
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
91
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
92
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
93
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
94
+ Video](https://imagen.research.google/video/paper.pdf) paper).
95
+ timestep_spacing (`str`, defaults to `"linspace"`):
96
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
97
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
98
+ steps_offset (`int`, defaults to 0):
99
+ An offset added to the inference steps. You can use a combination of `offset=1` and
100
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
101
+ Diffusion.
103
102
  """
104
103
 
105
104
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -171,12 +170,18 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
171
170
  timestep: Union[float, torch.FloatTensor],
172
171
  ) -> torch.FloatTensor:
173
172
  """
174
- Args:
175
173
  Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
176
174
  current timestep.
177
- sample (`torch.FloatTensor`): input sample timestep (`int`, optional): current timestep
175
+
176
+ Args:
177
+ sample (`torch.FloatTensor`):
178
+ The input sample.
179
+ timestep (`int`, *optional*):
180
+ The current timestep in the diffusion chain.
181
+
178
182
  Returns:
179
- `torch.FloatTensor`: scaled input sample
183
+ `torch.FloatTensor`:
184
+ A scaled input sample.
180
185
  """
181
186
  step_index = self.index_for_timestep(timestep)
182
187
 
@@ -195,13 +200,13 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
195
200
  num_train_timesteps: Optional[int] = None,
196
201
  ):
197
202
  """
198
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
203
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
199
204
 
200
205
  Args:
201
206
  num_inference_steps (`int`):
202
- the number of diffusion steps used when generating samples with a pre-trained model.
203
- device (`str` or `torch.device`, optional):
204
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
207
+ The number of diffusion steps used when generating samples with a pre-trained model.
208
+ device (`str` or `torch.device`, *optional*):
209
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
205
210
  """
206
211
  self.num_inference_steps = num_inference_steps
207
212
 
@@ -295,17 +300,23 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
295
300
  return_dict: bool = True,
296
301
  ) -> Union[SchedulerOutput, Tuple]:
297
302
  """
298
- Args:
299
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
303
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
300
304
  process from the learned model outputs (most often the predicted noise).
301
- model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model. timestep
302
- (`int`): current discrete timestep in the diffusion chain. sample (`torch.FloatTensor` or `np.ndarray`):
303
- current instance of sample being created by diffusion process.
304
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
305
+
306
+ Args:
307
+ model_output (`torch.FloatTensor`):
308
+ The direct output from learned diffusion model.
309
+ timestep (`float`):
310
+ The current discrete timestep in the diffusion chain.
311
+ sample (`torch.FloatTensor`):
312
+ A current instance of a sample created by the diffusion process.
313
+ return_dict (`bool`):
314
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
315
+
305
316
  Returns:
306
317
  [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
307
- [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
308
- returning a tuple, the first element is the sample tensor.
318
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
319
+ tuple is returned where the first element is the sample tensor.
309
320
  """
310
321
  step_index = self.index_for_timestep(timestep)
311
322
 
@@ -47,34 +47,32 @@ class KarrasVeOutput(BaseOutput):
47
47
 
48
48
  class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
49
49
  """
50
- Stochastic sampling from Karras et al. [1] tailored to the Variance-Expanding (VE) models [2]. Use Algorithm 2 and
51
- the VE column of Table 1 from [1] for reference.
50
+ A stochastic scheduler tailored to variance-expanding models.
52
51
 
53
- [1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models."
54
- https://arxiv.org/abs/2206.00364 [2] Song, Yang, et al. "Score-based generative modeling through stochastic
55
- differential equations." https://arxiv.org/abs/2011.13456
52
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
53
+ methods the library implements for all schedulers such as loading and saving.
56
54
 
57
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
58
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
59
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
60
- [`~SchedulerMixin.from_pretrained`] functions.
55
+ <Tip>
61
56
 
62
- For more details on the parameters, see the original paper's Appendix E.: "Elucidating the Design Space of
63
- Diffusion-Based Generative Models." https://arxiv.org/abs/2206.00364. The grid search values used to find the
64
- optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper.
57
+ For more details on the parameters, see [Appendix E](https://arxiv.org/abs/2206.00364). The grid search values used
58
+ to find the optimal `{s_noise, s_churn, s_min, s_max}` for a specific model are described in Table 5 of the paper.
65
59
 
66
- Args:
67
- sigma_min (`float`): minimum noise magnitude
68
- sigma_max (`float`): maximum noise magnitude
69
- s_noise (`float`): the amount of additional noise to counteract loss of detail during sampling.
70
- A reasonable range is [1.000, 1.011].
71
- s_churn (`float`): the parameter controlling the overall amount of stochasticity.
72
- A reasonable range is [0, 100].
73
- s_min (`float`): the start value of the sigma range where we add noise (enable stochasticity).
74
- A reasonable range is [0, 10].
75
- s_max (`float`): the end value of the sigma range where we add noise.
76
- A reasonable range is [0.2, 80].
60
+ </Tip>
77
61
 
62
+ Args:
63
+ sigma_min (`float`, defaults to 0.02):
64
+ The minimum noise magnitude.
65
+ sigma_max (`float`, defaults to 100):
66
+ The maximum noise magnitude.
67
+ s_noise (`float`, defaults to 1.007):
68
+ The amount of additional noise to counteract loss of detail during sampling. A reasonable range is [1.000,
69
+ 1.011].
70
+ s_churn (`float`, defaults to 80):
71
+ The parameter controlling the overall amount of stochasticity. A reasonable range is [0, 100].
72
+ s_min (`float`, defaults to 0.05):
73
+ The start value of the sigma range to add noise (enable stochasticity). A reasonable range is [0, 10].
74
+ s_max (`float`, defaults to 50):
75
+ The end value of the sigma range to add noise. A reasonable range is [0.2, 80].
78
76
  """
79
77
 
80
78
  order = 2
@@ -103,22 +101,26 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
103
101
  current timestep.
104
102
 
105
103
  Args:
106
- sample (`torch.FloatTensor`): input sample
107
- timestep (`int`, optional): current timestep
104
+ sample (`torch.FloatTensor`):
105
+ The input sample.
106
+ timestep (`int`, *optional*):
107
+ The current timestep in the diffusion chain.
108
108
 
109
109
  Returns:
110
- `torch.FloatTensor`: scaled input sample
110
+ `torch.FloatTensor`:
111
+ A scaled input sample.
111
112
  """
112
113
  return sample
113
114
 
114
115
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
115
116
  """
116
- Sets the continuous timesteps used for the diffusion chain. Supporting function to be run before inference.
117
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
117
118
 
118
119
  Args:
119
120
  num_inference_steps (`int`):
120
- the number of diffusion steps used when generating samples with a pre-trained model.
121
-
121
+ The number of diffusion steps used when generating samples with a pre-trained model.
122
+ device (`str` or `torch.device`, *optional*):
123
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
122
124
  """
123
125
  self.num_inference_steps = num_inference_steps
124
126
  timesteps = np.arange(0, self.num_inference_steps)[::-1].copy()
@@ -136,10 +138,15 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
136
138
  self, sample: torch.FloatTensor, sigma: float, generator: Optional[torch.Generator] = None
137
139
  ) -> Tuple[torch.FloatTensor, float]:
138
140
  """
139
- Explicit Langevin-like "churn" step of adding noise to the sample according to a factor gamma_i ≥ 0 to reach a
140
- higher noise level sigma_hat = sigma_i + gamma_i*sigma_i.
141
+ Explicit Langevin-like "churn" step of adding noise to the sample according to a `gamma_i ≥ 0` to reach a
142
+ higher noise level `sigma_hat = sigma_i + gamma_i*sigma_i`.
141
143
 
142
- TODO Args:
144
+ Args:
145
+ sample (`torch.FloatTensor`):
146
+ The input sample.
147
+ sigma (`float`):
148
+ generator (`torch.Generator`, *optional*):
149
+ A random number generator.
143
150
  """
144
151
  if self.config.s_min <= sigma <= self.config.s_max:
145
152
  gamma = min(self.config.s_churn / self.num_inference_steps, 2**0.5 - 1)
@@ -162,21 +169,22 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
162
169
  return_dict: bool = True,
163
170
  ) -> Union[KarrasVeOutput, Tuple]:
164
171
  """
165
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
172
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
166
173
  process from the learned model outputs (most often the predicted noise).
167
174
 
168
175
  Args:
169
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
170
- sigma_hat (`float`): TODO
171
- sigma_prev (`float`): TODO
172
- sample_hat (`torch.FloatTensor`): TODO
173
- return_dict (`bool`): option for returning tuple rather than KarrasVeOutput class
176
+ model_output (`torch.FloatTensor`):
177
+ The direct output from learned diffusion model.
178
+ sigma_hat (`float`):
179
+ sigma_prev (`float`):
180
+ sample_hat (`torch.FloatTensor`):
181
+ return_dict (`bool`, *optional*, defaults to `True`):
182
+ Whether or not to return a [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] or `tuple`.
174
183
 
175
- KarrasVeOutput: updated sample in the diffusion chain and derivative (TODO double check).
176
184
  Returns:
177
- [`~schedulers.scheduling_karras_ve.KarrasVeOutput`] or `tuple`:
178
- [`~schedulers.scheduling_karras_ve.KarrasVeOutput`] if `return_dict` is True, otherwise a `tuple`. When
179
- returning a tuple, the first element is the sample tensor.
185
+ [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] or `tuple`:
186
+ If return_dict is `True`, [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] is returned,
187
+ otherwise a tuple is returned where the first element is the sample tensor.
180
188
 
181
189
  """
182
190
 
@@ -202,16 +210,18 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
202
210
  return_dict: bool = True,
203
211
  ) -> Union[KarrasVeOutput, Tuple]:
204
212
  """
205
- Correct the predicted sample based on the output model_output of the network. TODO complete description
213
+ Corrects the predicted sample based on the `model_output` of the network.
206
214
 
207
215
  Args:
208
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
216
+ model_output (`torch.FloatTensor`):
217
+ The direct output from learned diffusion model.
209
218
  sigma_hat (`float`): TODO
210
219
  sigma_prev (`float`): TODO
211
220
  sample_hat (`torch.FloatTensor`): TODO
212
221
  sample_prev (`torch.FloatTensor`): TODO
213
222
  derivative (`torch.FloatTensor`): TODO
214
- return_dict (`bool`): option for returning tuple rather than KarrasVeOutput class
223
+ return_dict (`bool`, *optional*, defaults to `True`):
224
+ Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`.
215
225
 
216
226
  Returns:
217
227
  prev_sample (TODO): updated sample in the diffusion chain. derivative (TODO): TODO
@@ -29,14 +29,14 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
29
29
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->LMSDiscrete
30
30
  class LMSDiscreteSchedulerOutput(BaseOutput):
31
31
  """
32
- Output class for the scheduler's step function output.
32
+ Output class for the scheduler's `step` function output.
33
33
 
34
34
  Args:
35
35
  prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
36
- Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
36
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
37
37
  denoising loop.
38
38
  pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
39
- The predicted denoised sample (x_{0}) based on the model output from the current timestep.
39
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
40
40
  `pred_original_sample` can be used to preview progress or for guidance.
41
41
  """
42
42
 
@@ -91,39 +91,37 @@ def betas_for_alpha_bar(
91
91
 
92
92
  class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
93
93
  """
94
- Linear Multistep Scheduler for discrete beta schedules. Based on the original k-diffusion implementation by
95
- Katherine Crowson:
96
- https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L181
94
+ A linear multistep scheduler for discrete beta schedules.
97
95
 
98
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
99
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
100
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
101
- [`~SchedulerMixin.from_pretrained`] functions.
96
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
97
+ methods the library implements for all schedulers such as loading and saving.
102
98
 
103
99
  Args:
104
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
105
- beta_start (`float`): the starting `beta` value of inference.
106
- beta_end (`float`): the final `beta` value.
107
- beta_schedule (`str`):
108
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
100
+ num_train_timesteps (`int`, defaults to 1000):
101
+ The number of diffusion steps to train the model.
102
+ beta_start (`float`, defaults to 0.0001):
103
+ The starting `beta` value of inference.
104
+ beta_end (`float`, defaults to 0.02):
105
+ The final `beta` value.
106
+ beta_schedule (`str`, defaults to `"linear"`):
107
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
109
108
  `linear` or `scaled_linear`.
110
- trained_betas (`np.ndarray`, optional):
111
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
109
+ trained_betas (`np.ndarray`, *optional*):
110
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
112
111
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
113
- This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
114
- noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
115
- of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
116
- prediction_type (`str`, default `epsilon`, optional):
117
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
118
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
119
- https://imagen.research.google/video/paper.pdf)
120
- timestep_spacing (`str`, default `"linspace"`):
121
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
122
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
123
- steps_offset (`int`, default `0`):
124
- an offset added to the inference steps. You can use a combination of `offset=1` and
125
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
126
- stable diffusion.
112
+ Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
113
+ the sigmas are determined according to a sequence of noise levels {σi}.
114
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
115
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
116
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
117
+ Video](https://imagen.research.google/video/paper.pdf) paper).
118
+ timestep_spacing (`str`, defaults to `"linspace"`):
119
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
120
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
121
+ steps_offset (`int`, defaults to 0):
122
+ An offset added to the inference steps. You can use a combination of `offset=1` and
123
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
124
+ Diffusion.
127
125
  """
128
126
 
129
127
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -183,14 +181,18 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
183
181
  self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
184
182
  ) -> torch.FloatTensor:
185
183
  """
186
- Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the K-LMS algorithm.
184
+ Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
185
+ current timestep.
187
186
 
188
187
  Args:
189
- sample (`torch.FloatTensor`): input sample
190
- timestep (`float` or `torch.FloatTensor`): the current timestep in the diffusion chain
188
+ sample (`torch.FloatTensor`):
189
+ The input sample.
190
+ timestep (`float` or `torch.FloatTensor`):
191
+ The current timestep in the diffusion chain.
191
192
 
192
193
  Returns:
193
- `torch.FloatTensor`: scaled input sample
194
+ `torch.FloatTensor`:
195
+ A scaled input sample.
194
196
  """
195
197
  if isinstance(timestep, torch.Tensor):
196
198
  timestep = timestep.to(self.timesteps.device)
@@ -202,12 +204,12 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
202
204
 
203
205
  def get_lms_coefficient(self, order, t, current_order):
204
206
  """
205
- Compute a linear multistep coefficient.
207
+ Compute the linear multistep coefficient.
206
208
 
207
209
  Args:
208
- order (TODO):
209
- t (TODO):
210
- current_order (TODO):
210
+ order ():
211
+ t ():
212
+ current_order ():
211
213
  """
212
214
 
213
215
  def lms_derivative(tau):
@@ -224,13 +226,13 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
224
226
 
225
227
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
226
228
  """
227
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
229
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
228
230
 
229
231
  Args:
230
232
  num_inference_steps (`int`):
231
- the number of diffusion steps used when generating samples with a pre-trained model.
232
- device (`str` or `torch.device`, optional):
233
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
233
+ The number of diffusion steps used when generating samples with a pre-trained model.
234
+ device (`str` or `torch.device`, *optional*):
235
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
234
236
  """
235
237
  self.num_inference_steps = num_inference_steps
236
238
 
@@ -322,21 +324,25 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
322
324
  return_dict: bool = True,
323
325
  ) -> Union[LMSDiscreteSchedulerOutput, Tuple]:
324
326
  """
325
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
327
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
326
328
  process from the learned model outputs (most often the predicted noise).
327
329
 
328
330
  Args:
329
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
330
- timestep (`float`): current timestep in the diffusion chain.
331
+ model_output (`torch.FloatTensor`):
332
+ The direct output from learned diffusion model.
333
+ timestep (`float` or `torch.FloatTensor`):
334
+ The current discrete timestep in the diffusion chain.
331
335
  sample (`torch.FloatTensor`):
332
- current instance of sample being created by diffusion process.
333
- order: coefficient for multi-step inference.
334
- return_dict (`bool`): option for returning tuple rather than LMSDiscreteSchedulerOutput class
336
+ A current instance of a sample created by the diffusion process.
337
+ order (`int`, defaults to 4):
338
+ The order of the linear multistep method.
339
+ return_dict (`bool`, *optional*, defaults to `True`):
340
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
335
341
 
336
342
  Returns:
337
- [`~schedulers.scheduling_utils.LMSDiscreteSchedulerOutput`] or `tuple`:
338
- [`~schedulers.scheduling_utils.LMSDiscreteSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`.
339
- When returning a tuple, the first element is the sample tensor.
343
+ [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
344
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
345
+ tuple is returned where the first element is the sample tensor.
340
346
 
341
347
  """
342
348
  if not self.is_scale_input_called: