diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. diffusers/__init__.py +3 -1
  2. diffusers/commands/fp16_safetensors.py +2 -7
  3. diffusers/configuration_utils.py +23 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/loaders.py +62 -64
  6. diffusers/models/__init__.py +1 -0
  7. diffusers/models/activations.py +2 -0
  8. diffusers/models/attention.py +45 -1
  9. diffusers/models/autoencoder_tiny.py +193 -0
  10. diffusers/models/controlnet.py +1 -1
  11. diffusers/models/embeddings.py +56 -0
  12. diffusers/models/lora.py +0 -6
  13. diffusers/models/modeling_flax_utils.py +28 -2
  14. diffusers/models/modeling_utils.py +33 -16
  15. diffusers/models/transformer_2d.py +26 -9
  16. diffusers/models/unet_1d.py +2 -2
  17. diffusers/models/unet_2d_blocks.py +106 -56
  18. diffusers/models/unet_2d_condition.py +20 -5
  19. diffusers/models/vae.py +106 -1
  20. diffusers/pipelines/__init__.py +1 -0
  21. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
  22. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
  23. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
  24. diffusers/pipelines/auto_pipeline.py +33 -43
  25. diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
  26. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
  27. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
  28. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
  29. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
  30. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
  31. diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
  32. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
  33. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
  34. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
  35. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
  36. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
  37. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
  38. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
  39. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  40. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  41. diffusers/pipelines/pipeline_flax_utils.py +41 -4
  42. diffusers/pipelines/pipeline_utils.py +60 -16
  43. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
  44. diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  45. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
  46. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
  47. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
  48. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
  49. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
  50. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
  51. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
  52. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
  53. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
  54. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
  55. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
  56. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
  57. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
  58. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
  59. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
  60. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
  61. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
  65. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
  66. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
  67. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
  68. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
  69. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
  70. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
  71. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
  72. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
  73. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
  74. diffusers/schedulers/scheduling_consistency_models.py +70 -57
  75. diffusers/schedulers/scheduling_ddim.py +76 -71
  76. diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
  77. diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
  78. diffusers/schedulers/scheduling_ddpm.py +68 -67
  79. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
  80. diffusers/schedulers/scheduling_deis_multistep.py +93 -85
  81. diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
  82. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
  83. diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
  84. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
  85. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
  86. diffusers/schedulers/scheduling_euler_discrete.py +63 -56
  87. diffusers/schedulers/scheduling_heun_discrete.py +57 -45
  88. diffusers/schedulers/scheduling_ipndm.py +27 -22
  89. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
  90. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
  91. diffusers/schedulers/scheduling_karras_ve.py +55 -45
  92. diffusers/schedulers/scheduling_lms_discrete.py +58 -52
  93. diffusers/schedulers/scheduling_pndm.py +77 -62
  94. diffusers/schedulers/scheduling_repaint.py +56 -38
  95. diffusers/schedulers/scheduling_sde_ve.py +62 -50
  96. diffusers/schedulers/scheduling_sde_vp.py +32 -11
  97. diffusers/schedulers/scheduling_unclip.py +3 -3
  98. diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
  99. diffusers/schedulers/scheduling_utils.py +41 -35
  100. diffusers/schedulers/scheduling_utils_flax.py +8 -2
  101. diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
  102. diffusers/utils/__init__.py +2 -2
  103. diffusers/utils/dummy_pt_objects.py +15 -0
  104. diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
  105. diffusers/utils/hub_utils.py +105 -2
  106. diffusers/utils/import_utils.py +0 -4
  107. diffusers/utils/pil_utils.py +19 -0
  108. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
  109. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
  110. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
  111. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
  112. diffusers/models/cross_attention.py +0 -94
  113. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
  114. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -71,42 +71,42 @@ def betas_for_alpha_bar(
71
71
 
72
72
  class PNDMScheduler(SchedulerMixin, ConfigMixin):
73
73
  """
74
- Pseudo numerical methods for diffusion models (PNDM) proposes using more advanced ODE integration techniques,
75
- namely Runge-Kutta method and a linear multi-step method.
74
+ `PNDMScheduler` uses pseudo numerical methods for diffusion models such as the Runge-Kutta and linear multi-step
75
+ method.
76
76
 
77
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
78
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
79
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
80
- [`~SchedulerMixin.from_pretrained`] functions.
81
-
82
- For more details, see the original paper: https://arxiv.org/abs/2202.09778
77
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
78
+ methods the library implements for all schedulers such as loading and saving.
83
79
 
84
80
  Args:
85
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
86
- beta_start (`float`): the starting `beta` value of inference.
87
- beta_end (`float`): the final `beta` value.
88
- beta_schedule (`str`):
89
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
81
+ num_train_timesteps (`int`, defaults to 1000):
82
+ The number of diffusion steps to train the model.
83
+ beta_start (`float`, defaults to 0.0001):
84
+ The starting `beta` value of inference.
85
+ beta_end (`float`, defaults to 0.02):
86
+ The final `beta` value.
87
+ beta_schedule (`str`, defaults to `"linear"`):
88
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
90
89
  `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
91
- trained_betas (`np.ndarray`, optional):
92
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
93
- skip_prk_steps (`bool`):
94
- allows the scheduler to skip the Runge-Kutta steps that are defined in the original paper as being required
95
- before plms steps; defaults to `False`.
96
- set_alpha_to_one (`bool`, default `False`):
97
- each diffusion step uses the value of alphas product at that step and at the previous one. For the final
98
- step there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
99
- otherwise it uses the value of alpha at step 0.
100
- prediction_type (`str`, default `epsilon`, optional):
101
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion process)
102
- or `v_prediction` (see section 2.4 https://imagen.research.google/video/paper.pdf)
103
- timestep_spacing (`str`, default `"leading"`):
104
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
105
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
106
- steps_offset (`int`, default `0`):
107
- an offset added to the inference steps. You can use a combination of `offset=1` and
108
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
109
- stable diffusion.
90
+ trained_betas (`np.ndarray`, *optional*):
91
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
92
+ skip_prk_steps (`bool`, defaults to `False`):
93
+ Allows the scheduler to skip the Runge-Kutta steps defined in the original paper as being required before
94
+ PLMS steps.
95
+ set_alpha_to_one (`bool`, defaults to `False`):
96
+ Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
97
+ there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
98
+ otherwise it uses the alpha value at step 0.
99
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
100
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process)
101
+ or `v_prediction` (see section 2.4 of [Imagen Video](https://imagen.research.google/video/paper.pdf)
102
+ paper).
103
+ timestep_spacing (`str`, defaults to `"leading"`):
104
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
105
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
106
+ steps_offset (`int`, defaults to 0):
107
+ An offset added to the inference steps. You can use a combination of `offset=1` and
108
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
109
+ Diffusion.
110
110
  """
111
111
 
112
112
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -169,11 +169,13 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
169
169
 
170
170
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
171
171
  """
172
- Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
172
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
173
173
 
174
174
  Args:
175
175
  num_inference_steps (`int`):
176
- the number of diffusion steps used when generating samples with a pre-trained model.
176
+ The number of diffusion steps used when generating samples with a pre-trained model.
177
+ device (`str` or `torch.device`, *optional*):
178
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
177
179
  """
178
180
 
179
181
  self.num_inference_steps = num_inference_steps
@@ -233,22 +235,24 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
233
235
  return_dict: bool = True,
234
236
  ) -> Union[SchedulerOutput, Tuple]:
235
237
  """
236
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
237
- process from the learned model outputs (most often the predicted noise).
238
-
239
- This function calls `step_prk()` or `step_plms()` depending on the internal variable `counter`.
238
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
239
+ process from the learned model outputs (most often the predicted noise), and calls [`~PNDMScheduler.step_prk`]
240
+ or [`~PNDMScheduler.step_plms`] depending on the internal variable `counter`.
240
241
 
241
242
  Args:
242
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
243
- timestep (`int`): current discrete timestep in the diffusion chain.
243
+ model_output (`torch.FloatTensor`):
244
+ The direct output from learned diffusion model.
245
+ timestep (`int`):
246
+ The current discrete timestep in the diffusion chain.
244
247
  sample (`torch.FloatTensor`):
245
- current instance of sample being created by diffusion process.
246
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
248
+ A current instance of a sample created by the diffusion process.
249
+ return_dict (`bool`):
250
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`.
247
251
 
248
252
  Returns:
249
253
  [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
250
- [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
251
- returning a tuple, the first element is the sample tensor.
254
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
255
+ tuple is returned where the first element is the sample tensor.
252
256
 
253
257
  """
254
258
  if self.counter < len(self.prk_timesteps) and not self.config.skip_prk_steps:
@@ -264,19 +268,24 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
264
268
  return_dict: bool = True,
265
269
  ) -> Union[SchedulerOutput, Tuple]:
266
270
  """
267
- Step function propagating the sample with the Runge-Kutta method. RK takes 4 forward passes to approximate the
268
- solution to the differential equation.
271
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
272
+ the Runge-Kutta method. It performs four forward passes to approximate the solution to the differential
273
+ equation.
269
274
 
270
275
  Args:
271
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
272
- timestep (`int`): current discrete timestep in the diffusion chain.
276
+ model_output (`torch.FloatTensor`):
277
+ The direct output from learned diffusion model.
278
+ timestep (`int`):
279
+ The current discrete timestep in the diffusion chain.
273
280
  sample (`torch.FloatTensor`):
274
- current instance of sample being created by diffusion process.
275
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
281
+ A current instance of a sample created by the diffusion process.
282
+ return_dict (`bool`):
283
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
276
284
 
277
285
  Returns:
278
- [`~scheduling_utils.SchedulerOutput`] or `tuple`: [`~scheduling_utils.SchedulerOutput`] if `return_dict` is
279
- True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor.
286
+ [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
287
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
288
+ tuple is returned where the first element is the sample tensor.
280
289
 
281
290
  """
282
291
  if self.num_inference_steps is None:
@@ -319,19 +328,23 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
319
328
  return_dict: bool = True,
320
329
  ) -> Union[SchedulerOutput, Tuple]:
321
330
  """
322
- Step function propagating the sample with the linear multi-step method. This has one forward pass with multiple
323
- times to approximate the solution.
331
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
332
+ the linear multistep method. It performs one forward pass multiple times to approximate the solution.
324
333
 
325
334
  Args:
326
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
327
- timestep (`int`): current discrete timestep in the diffusion chain.
335
+ model_output (`torch.FloatTensor`):
336
+ The direct output from learned diffusion model.
337
+ timestep (`int`):
338
+ The current discrete timestep in the diffusion chain.
328
339
  sample (`torch.FloatTensor`):
329
- current instance of sample being created by diffusion process.
330
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
340
+ A current instance of a sample created by the diffusion process.
341
+ return_dict (`bool`):
342
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
331
343
 
332
344
  Returns:
333
- [`~scheduling_utils.SchedulerOutput`] or `tuple`: [`~scheduling_utils.SchedulerOutput`] if `return_dict` is
334
- True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor.
345
+ [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
346
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
347
+ tuple is returned where the first element is the sample tensor.
335
348
 
336
349
  """
337
350
  if self.num_inference_steps is None:
@@ -384,10 +397,12 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
384
397
  current timestep.
385
398
 
386
399
  Args:
387
- sample (`torch.FloatTensor`): input sample
400
+ sample (`torch.FloatTensor`):
401
+ The input sample.
388
402
 
389
403
  Returns:
390
- `torch.FloatTensor`: scaled input sample
404
+ `torch.FloatTensor`:
405
+ A scaled input sample.
391
406
  """
392
407
  return sample
393
408
 
@@ -89,32 +89,28 @@ def betas_for_alpha_bar(
89
89
 
90
90
  class RePaintScheduler(SchedulerMixin, ConfigMixin):
91
91
  """
92
- RePaint is a schedule for DDPM inpainting inside a given mask.
92
+ `RePaintScheduler` is a scheduler for DDPM inpainting inside a given mask.
93
93
 
94
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
95
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
96
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
97
- [`~SchedulerMixin.from_pretrained`] functions.
98
-
99
- For more details, see the original paper: https://arxiv.org/pdf/2201.09865.pdf
94
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
95
+ methods the library implements for all schedulers such as loading and saving.
100
96
 
101
97
  Args:
102
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
103
- beta_start (`float`): the starting `beta` value of inference.
104
- beta_end (`float`): the final `beta` value.
105
- beta_schedule (`str`):
106
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
107
- `linear`, `scaled_linear`, `squaredcos_cap_v2` or `sigmoid`.
98
+ num_train_timesteps (`int`, defaults to 1000):
99
+ The number of diffusion steps to train the model.
100
+ beta_start (`float`, defaults to 0.0001):
101
+ The starting `beta` value of inference.
102
+ beta_end (`float`, defaults to 0.02):
103
+ The final `beta` value.
104
+ beta_schedule (`str`, defaults to `"linear"`):
105
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
106
+ `linear`, `scaled_linear`, `squaredcos_cap_v2`, or `sigmoid`.
108
107
  eta (`float`):
109
- The weight of noise for added noise in a diffusion step. Its value is between 0.0 and 1.0 -0.0 is DDIM and
110
- 1.0 is DDPM scheduler respectively.
111
- trained_betas (`np.ndarray`, optional):
112
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
113
- variance_type (`str`):
114
- options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`,
115
- `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
116
- clip_sample (`bool`, default `True`):
117
- option to clip predicted sample between -1 and 1 for numerical stability.
108
+ The weight of noise for added noise in diffusion step. If its value is between 0.0 and 1.0 it corresponds
109
+ to the DDIM scheduler, and if its value is between -0.0 and 1.0 it corresponds to the DDPM scheduler.
110
+ trained_betas (`np.ndarray`, *optional*):
111
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
112
+ clip_sample (`bool`, defaults to `True`):
113
+ Clip the predicted sample between -1 and 1 for numerical stability.
118
114
 
119
115
  """
120
116
 
@@ -171,11 +167,14 @@ class RePaintScheduler(SchedulerMixin, ConfigMixin):
171
167
  current timestep.
172
168
 
173
169
  Args:
174
- sample (`torch.FloatTensor`): input sample
175
- timestep (`int`, optional): current timestep
170
+ sample (`torch.FloatTensor`):
171
+ The input sample.
172
+ timestep (`int`, *optional*):
173
+ The current timestep in the diffusion chain.
176
174
 
177
175
  Returns:
178
- `torch.FloatTensor`: scaled input sample
176
+ `torch.FloatTensor`:
177
+ A scaled input sample.
179
178
  """
180
179
  return sample
181
180
 
@@ -186,6 +185,23 @@ class RePaintScheduler(SchedulerMixin, ConfigMixin):
186
185
  jump_n_sample: int = 10,
187
186
  device: Union[str, torch.device] = None,
188
187
  ):
188
+ """
189
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
190
+
191
+ Args:
192
+ num_inference_steps (`int`):
193
+ The number of diffusion steps used when generating samples with a pre-trained model. If used,
194
+ `timesteps` must be `None`.
195
+ jump_length (`int`, defaults to 10):
196
+ The number of steps taken forward in time before going backward in time for a single jump (“j” in
197
+ RePaint paper). Take a look at Figure 9 and 10 in the paper.
198
+ jump_n_sample (`int`, defaults to 10):
199
+ The number of times to make a forward time jump for a given chosen time sample. Take a look at Figure 9
200
+ and 10 in the paper.
201
+ device (`str` or `torch.device`, *optional*):
202
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
203
+
204
+ """
189
205
  num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps)
190
206
  self.num_inference_steps = num_inference_steps
191
207
 
@@ -239,27 +255,29 @@ class RePaintScheduler(SchedulerMixin, ConfigMixin):
239
255
  return_dict: bool = True,
240
256
  ) -> Union[RePaintSchedulerOutput, Tuple]:
241
257
  """
242
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
258
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
243
259
  process from the learned model outputs (most often the predicted noise).
244
260
 
245
261
  Args:
246
- model_output (`torch.FloatTensor`): direct output from learned
247
- diffusion model.
248
- timestep (`int`): current discrete timestep in the diffusion chain.
262
+ model_output (`torch.FloatTensor`):
263
+ The direct output from learned diffusion model.
264
+ timestep (`int`):
265
+ The current discrete timestep in the diffusion chain.
249
266
  sample (`torch.FloatTensor`):
250
- current instance of sample being created by diffusion process.
267
+ A current instance of a sample created by the diffusion process.
251
268
  original_image (`torch.FloatTensor`):
252
- the original image to inpaint on.
269
+ The original image to inpaint on.
253
270
  mask (`torch.FloatTensor`):
254
- the mask where 0.0 values define which part of the original image to inpaint (change).
255
- generator (`torch.Generator`, *optional*): random number generator.
256
- return_dict (`bool`): option for returning tuple rather than
257
- DDPMSchedulerOutput class
271
+ The mask where a value of 0.0 indicates which part of the original image to inpaint.
272
+ generator (`torch.Generator`, *optional*):
273
+ A random number generator.
274
+ return_dict (`bool`, *optional*, defaults to `True`):
275
+ Whether or not to return a [`~schedulers.scheduling_repaint.RePaintSchedulerOutput`] or `tuple`.
258
276
 
259
277
  Returns:
260
- [`~schedulers.scheduling_utils.RePaintSchedulerOutput`] or `tuple`:
261
- [`~schedulers.scheduling_utils.RePaintSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
262
- returning a tuple, the first element is the sample tensor.
278
+ [`~schedulers.scheduling_repaint.RePaintSchedulerOutput`] or `tuple`:
279
+ If return_dict is `True`, [`~schedulers.scheduling_repaint.RePaintSchedulerOutput`] is returned,
280
+ otherwise a tuple is returned where the first element is the sample tensor.
263
281
 
264
282
  """
265
283
  t = timestep
@@ -28,14 +28,14 @@ from .scheduling_utils import SchedulerMixin, SchedulerOutput
28
28
  @dataclass
29
29
  class SdeVeOutput(BaseOutput):
30
30
  """
31
- Output class for the ScoreSdeVeScheduler's step function output.
31
+ Output class for the scheduler's `step` function output.
32
32
 
33
33
  Args:
34
34
  prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
35
- Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
35
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
36
36
  denoising loop.
37
37
  prev_sample_mean (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
38
- Mean averaged `prev_sample`. Same as `prev_sample`, only mean-averaged over previous timesteps.
38
+ Mean averaged `prev_sample` over previous timesteps.
39
39
  """
40
40
 
41
41
  prev_sample: torch.FloatTensor
@@ -44,26 +44,25 @@ class SdeVeOutput(BaseOutput):
44
44
 
45
45
  class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
46
46
  """
47
- The variance exploding stochastic differential equation (SDE) scheduler.
47
+ `ScoreSdeVeScheduler` is a variance exploding stochastic differential equation (SDE) scheduler.
48
48
 
49
- For more information, see the original paper: https://arxiv.org/abs/2011.13456
50
-
51
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
52
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
53
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
54
- [`~SchedulerMixin.from_pretrained`] functions.
49
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
50
+ methods the library implements for all schedulers such as loading and saving.
55
51
 
56
52
  Args:
57
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
58
- snr (`float`):
59
- coefficient weighting the step from the model_output sample (from the network) to the random noise.
60
- sigma_min (`float`):
61
- initial noise scale for sigma sequence in sampling procedure. The minimum sigma should mirror the
62
- distribution of the data.
63
- sigma_max (`float`): maximum value used for the range of continuous timesteps passed into the model.
64
- sampling_eps (`float`): the end value of sampling, where timesteps decrease progressively from 1 to
65
- epsilon.
66
- correct_steps (`int`): number of correction steps performed on a produced sample.
53
+ num_train_timesteps (`int`, defaults to 1000):
54
+ The number of diffusion steps to train the model.
55
+ snr (`float`, defaults to 0.15):
56
+ A coefficient weighting the step from the `model_output` sample (from the network) to the random noise.
57
+ sigma_min (`float`, defaults to 0.01):
58
+ The initial noise scale for the sigma sequence in the sampling procedure. The minimum sigma should mirror
59
+ the distribution of the data.
60
+ sigma_max (`float`, defaults to 1348.0):
61
+ The maximum value used for the range of continuous timesteps passed into the model.
62
+ sampling_eps (`float`, defaults to 1e-5):
63
+ The end value of sampling where timesteps decrease progressively from 1 to epsilon.
64
+ correct_steps (`int`, defaults to 1):
65
+ The number of correction steps performed on a produced sample.
67
66
  """
68
67
 
69
68
  order = 1
@@ -92,11 +91,14 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
92
91
  current timestep.
93
92
 
94
93
  Args:
95
- sample (`torch.FloatTensor`): input sample
96
- timestep (`int`, optional): current timestep
94
+ sample (`torch.FloatTensor`):
95
+ The input sample.
96
+ timestep (`int`, *optional*):
97
+ The current timestep in the diffusion chain.
97
98
 
98
99
  Returns:
99
- `torch.FloatTensor`: scaled input sample
100
+ `torch.FloatTensor`:
101
+ A scaled input sample.
100
102
  """
101
103
  return sample
102
104
 
@@ -104,13 +106,15 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
104
106
  self, num_inference_steps: int, sampling_eps: float = None, device: Union[str, torch.device] = None
105
107
  ):
106
108
  """
107
- Sets the continuous timesteps used for the diffusion chain. Supporting function to be run before inference.
109
+ Sets the continuous timesteps used for the diffusion chain (to be run before inference).
108
110
 
109
111
  Args:
110
112
  num_inference_steps (`int`):
111
- the number of diffusion steps used when generating samples with a pre-trained model.
112
- sampling_eps (`float`, optional):
113
- final timestep value (overrides value given at Scheduler instantiation).
113
+ The number of diffusion steps used when generating samples with a pre-trained model.
114
+ sampling_eps (`float`, *optional*):
115
+ The final timestep value (overrides value given during scheduler instantiation).
116
+ device (`str` or `torch.device`, *optional*):
117
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
114
118
 
115
119
  """
116
120
  sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps
@@ -121,19 +125,18 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
121
125
  self, num_inference_steps: int, sigma_min: float = None, sigma_max: float = None, sampling_eps: float = None
122
126
  ):
123
127
  """
124
- Sets the noise scales used for the diffusion chain. Supporting function to be run before inference.
125
-
126
- The sigmas control the weight of the `drift` and `diffusion` components of sample update.
128
+ Sets the noise scales used for the diffusion chain (to be run before inference). The sigmas control the weight
129
+ of the `drift` and `diffusion` components of the sample update.
127
130
 
128
131
  Args:
129
132
  num_inference_steps (`int`):
130
- the number of diffusion steps used when generating samples with a pre-trained model.
133
+ The number of diffusion steps used when generating samples with a pre-trained model.
131
134
  sigma_min (`float`, optional):
132
- initial noise scale value (overrides value given at Scheduler instantiation).
135
+ The initial noise scale value (overrides value given during scheduler instantiation).
133
136
  sigma_max (`float`, optional):
134
- final noise scale value (overrides value given at Scheduler instantiation).
137
+ The final noise scale value (overrides value given during scheduler instantiation).
135
138
  sampling_eps (`float`, optional):
136
- final timestep value (overrides value given at Scheduler instantiation).
139
+ The final timestep value (overrides value given during scheduler instantiation).
137
140
 
138
141
  """
139
142
  sigma_min = sigma_min if sigma_min is not None else self.config.sigma_min
@@ -162,20 +165,25 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
162
165
  return_dict: bool = True,
163
166
  ) -> Union[SdeVeOutput, Tuple]:
164
167
  """
165
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
168
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
166
169
  process from the learned model outputs (most often the predicted noise).
167
170
 
168
171
  Args:
169
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
170
- timestep (`int`): current discrete timestep in the diffusion chain.
172
+ model_output (`torch.FloatTensor`):
173
+ The direct output from learned diffusion model.
174
+ timestep (`int`):
175
+ The current discrete timestep in the diffusion chain.
171
176
  sample (`torch.FloatTensor`):
172
- current instance of sample being created by diffusion process.
173
- generator: random number generator.
174
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
177
+ A current instance of a sample created by the diffusion process.
178
+ generator (`torch.Generator`, *optional*):
179
+ A random number generator.
180
+ return_dict (`bool`, *optional*, defaults to `True`):
181
+ Whether or not to return a [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`.
175
182
 
176
183
  Returns:
177
- [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`: [`~schedulers.scheduling_sde_ve.SdeVeOutput`] if
178
- `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor.
184
+ [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`:
185
+ If return_dict is `True`, [`~schedulers.scheduling_sde_ve.SdeVeOutput`] is returned, otherwise a tuple
186
+ is returned where the first element is the sample tensor.
179
187
 
180
188
  """
181
189
  if self.timesteps is None:
@@ -224,19 +232,23 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
224
232
  return_dict: bool = True,
225
233
  ) -> Union[SchedulerOutput, Tuple]:
226
234
  """
227
- Correct the predicted sample based on the output model_output of the network. This is often run repeatedly
228
- after making the prediction for the previous timestep.
235
+ Correct the predicted sample based on the `model_output` of the network. This is often run repeatedly after
236
+ making the prediction for the previous timestep.
229
237
 
230
238
  Args:
231
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
239
+ model_output (`torch.FloatTensor`):
240
+ The direct output from learned diffusion model.
232
241
  sample (`torch.FloatTensor`):
233
- current instance of sample being created by diffusion process.
234
- generator: random number generator.
235
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
242
+ A current instance of a sample created by the diffusion process.
243
+ generator (`torch.Generator`, *optional*):
244
+ A random number generator.
245
+ return_dict (`bool`, *optional*, defaults to `True`):
246
+ Whether or not to return a [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`.
236
247
 
237
248
  Returns:
238
- [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`: [`~schedulers.scheduling_sde_ve.SdeVeOutput`] if
239
- `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor.
249
+ [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`:
250
+ If return_dict is `True`, [`~schedulers.scheduling_sde_ve.SdeVeOutput`] is returned, otherwise a tuple
251
+ is returned where the first element is the sample tensor.
240
252
 
241
253
  """
242
254
  if self.timesteps is None:
@@ -26,17 +26,18 @@ from .scheduling_utils import SchedulerMixin
26
26
 
27
27
  class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
28
28
  """
29
- The variance preserving stochastic differential equation (SDE) scheduler.
30
-
31
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
32
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
33
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
34
- [`~SchedulerMixin.from_pretrained`] functions.
35
-
36
- For more information, see the original paper: https://arxiv.org/abs/2011.13456
37
-
38
- UNDER CONSTRUCTION
39
-
29
+ `ScoreSdeVpScheduler` is a variance preserving stochastic differential equation (SDE) scheduler.
30
+
31
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
32
+ methods the library implements for all schedulers such as loading and saving.
33
+
34
+ Args:
35
+ num_train_timesteps (`int`, defaults to 2000):
36
+ The number of diffusion steps to train the model.
37
+ beta_min (`int`, defaults to 0.1):
38
+ beta_max (`int`, defaults to 20):
39
+ sampling_eps (`int`, defaults to 1e-3):
40
+ The end value of sampling where timesteps decrease progressively from 1 to epsilon.
40
41
  """
41
42
 
42
43
  order = 1
@@ -48,9 +49,29 @@ class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
48
49
  self.timesteps = None
49
50
 
50
51
  def set_timesteps(self, num_inference_steps, device: Union[str, torch.device] = None):
52
+ """
53
+ Sets the continuous timesteps used for the diffusion chain (to be run before inference).
54
+
55
+ Args:
56
+ num_inference_steps (`int`):
57
+ The number of diffusion steps used when generating samples with a pre-trained model.
58
+ device (`str` or `torch.device`, *optional*):
59
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
60
+ """
51
61
  self.timesteps = torch.linspace(1, self.config.sampling_eps, num_inference_steps, device=device)
52
62
 
53
63
  def step_pred(self, score, x, t, generator=None):
64
+ """
65
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
66
+ process from the learned model outputs (most often the predicted noise).
67
+
68
+ Args:
69
+ score ():
70
+ x ():
71
+ t ():
72
+ generator (`torch.Generator`, *optional*):
73
+ A random number generator.
74
+ """
54
75
  if self.timesteps is None:
55
76
  raise ValueError(
56
77
  "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler"
@@ -28,14 +28,14 @@ from .scheduling_utils import SchedulerMixin
28
28
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->UnCLIP
29
29
  class UnCLIPSchedulerOutput(BaseOutput):
30
30
  """
31
- Output class for the scheduler's step function output.
31
+ Output class for the scheduler's `step` function output.
32
32
 
33
33
  Args:
34
34
  prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
35
- Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
35
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
36
36
  denoising loop.
37
37
  pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
38
- The predicted denoised sample (x_{0}) based on the model output from the current timestep.
38
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
39
39
  `pred_original_sample` can be used to preview progress or for guidance.
40
40
  """
41
41