diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. diffusers/__init__.py +3 -1
  2. diffusers/commands/fp16_safetensors.py +2 -7
  3. diffusers/configuration_utils.py +23 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/loaders.py +62 -64
  6. diffusers/models/__init__.py +1 -0
  7. diffusers/models/activations.py +2 -0
  8. diffusers/models/attention.py +45 -1
  9. diffusers/models/autoencoder_tiny.py +193 -0
  10. diffusers/models/controlnet.py +1 -1
  11. diffusers/models/embeddings.py +56 -0
  12. diffusers/models/lora.py +0 -6
  13. diffusers/models/modeling_flax_utils.py +28 -2
  14. diffusers/models/modeling_utils.py +33 -16
  15. diffusers/models/transformer_2d.py +26 -9
  16. diffusers/models/unet_1d.py +2 -2
  17. diffusers/models/unet_2d_blocks.py +106 -56
  18. diffusers/models/unet_2d_condition.py +20 -5
  19. diffusers/models/vae.py +106 -1
  20. diffusers/pipelines/__init__.py +1 -0
  21. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
  22. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
  23. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
  24. diffusers/pipelines/auto_pipeline.py +33 -43
  25. diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
  26. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
  27. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
  28. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
  29. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
  30. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
  31. diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
  32. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
  33. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
  34. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
  35. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
  36. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
  37. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
  38. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
  39. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  40. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  41. diffusers/pipelines/pipeline_flax_utils.py +41 -4
  42. diffusers/pipelines/pipeline_utils.py +60 -16
  43. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
  44. diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  45. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
  46. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
  47. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
  48. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
  49. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
  50. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
  51. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
  52. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
  53. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
  54. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
  55. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
  56. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
  57. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
  58. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
  59. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
  60. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
  61. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
  65. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
  66. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
  67. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
  68. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
  69. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
  70. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
  71. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
  72. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
  73. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
  74. diffusers/schedulers/scheduling_consistency_models.py +70 -57
  75. diffusers/schedulers/scheduling_ddim.py +76 -71
  76. diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
  77. diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
  78. diffusers/schedulers/scheduling_ddpm.py +68 -67
  79. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
  80. diffusers/schedulers/scheduling_deis_multistep.py +93 -85
  81. diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
  82. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
  83. diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
  84. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
  85. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
  86. diffusers/schedulers/scheduling_euler_discrete.py +63 -56
  87. diffusers/schedulers/scheduling_heun_discrete.py +57 -45
  88. diffusers/schedulers/scheduling_ipndm.py +27 -22
  89. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
  90. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
  91. diffusers/schedulers/scheduling_karras_ve.py +55 -45
  92. diffusers/schedulers/scheduling_lms_discrete.py +58 -52
  93. diffusers/schedulers/scheduling_pndm.py +77 -62
  94. diffusers/schedulers/scheduling_repaint.py +56 -38
  95. diffusers/schedulers/scheduling_sde_ve.py +62 -50
  96. diffusers/schedulers/scheduling_sde_vp.py +32 -11
  97. diffusers/schedulers/scheduling_unclip.py +3 -3
  98. diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
  99. diffusers/schedulers/scheduling_utils.py +41 -35
  100. diffusers/schedulers/scheduling_utils_flax.py +8 -2
  101. diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
  102. diffusers/utils/__init__.py +2 -2
  103. diffusers/utils/dummy_pt_objects.py +15 -0
  104. diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
  105. diffusers/utils/hub_utils.py +105 -2
  106. diffusers/utils/import_utils.py +0 -4
  107. diffusers/utils/pil_utils.py +19 -0
  108. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
  109. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
  110. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
  111. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
  112. diffusers/models/cross_attention.py +0 -94
  113. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
  114. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -75,79 +75,59 @@ def betas_for_alpha_bar(
75
75
 
76
76
  class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
77
77
  """
78
- DPM-Solver (and the improved version DPM-Solver++) is a fast dedicated high-order solver for diffusion ODEs with
79
- the convergence order guarantee. Empirically, sampling by DPM-Solver with only 20 steps can generate high-quality
80
- samples, and it can generate quite good samples even in only 10 steps.
78
+ `DPMSolverSinglestepScheduler` is a fast dedicated high-order solver for diffusion ODEs.
81
79
 
82
- For more details, see the original paper: https://arxiv.org/abs/2206.00927 and https://arxiv.org/abs/2211.01095
83
-
84
- Currently, we support the singlestep DPM-Solver for both noise prediction models and data prediction models. We
85
- recommend to use `solver_order=2` for guided sampling, and `solver_order=3` for unconditional sampling.
86
-
87
- We also support the "dynamic thresholding" method in Imagen (https://arxiv.org/abs/2205.11487). For pixel-space
88
- diffusion models, you can set both `algorithm_type="dpmsolver++"` and `thresholding=True` to use the dynamic
89
- thresholding. Note that the thresholding method is unsuitable for latent-space diffusion models (such as
90
- stable-diffusion).
91
-
92
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
93
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
94
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
95
- [`~SchedulerMixin.from_pretrained`] functions.
80
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
81
+ methods the library implements for all schedulers such as loading and saving.
96
82
 
97
83
  Args:
98
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
99
- beta_start (`float`): the starting `beta` value of inference.
100
- beta_end (`float`): the final `beta` value.
101
- beta_schedule (`str`):
102
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
84
+ num_train_timesteps (`int`, defaults to 1000):
85
+ The number of diffusion steps to train the model.
86
+ beta_start (`float`, defaults to 0.0001):
87
+ The starting `beta` value of inference.
88
+ beta_end (`float`, defaults to 0.02):
89
+ The final `beta` value.
90
+ beta_schedule (`str`, defaults to `"linear"`):
91
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
103
92
  `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
104
- trained_betas (`np.ndarray`, optional):
105
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
106
- solver_order (`int`, default `2`):
107
- the order of DPM-Solver; can be `1` or `2` or `3`. We recommend to use `solver_order=2` for guided
93
+ trained_betas (`np.ndarray`, *optional*):
94
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
95
+ solver_order (`int`, defaults to 2):
96
+ The DPMSolver order which can be `1` or `2` or `3`. It is recommended to use `solver_order=2` for guided
108
97
  sampling, and `solver_order=3` for unconditional sampling.
109
- prediction_type (`str`, default `epsilon`):
110
- indicates whether the model predicts the noise (epsilon), or the data / `x0`. One of `epsilon`, `sample`,
111
- or `v-prediction`.
112
- thresholding (`bool`, default `False`):
113
- whether to use the "dynamic thresholding" method (introduced by Imagen, https://arxiv.org/abs/2205.11487).
114
- For pixel-space diffusion models, you can set both `algorithm_type=dpmsolver++` and `thresholding=True` to
115
- use the dynamic thresholding. Note that the thresholding method is unsuitable for latent-space diffusion
116
- models (such as stable-diffusion).
117
- dynamic_thresholding_ratio (`float`, default `0.995`):
118
- the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
119
- (https://arxiv.org/abs/2205.11487).
120
- sample_max_value (`float`, default `1.0`):
121
- the threshold value for dynamic thresholding. Valid only when `thresholding=True` and
122
- `algorithm_type="dpmsolver++`.
123
- algorithm_type (`str`, default `dpmsolver++`):
124
- the algorithm type for the solver. Either `dpmsolver` or `dpmsolver++`. The `dpmsolver` type implements the
125
- algorithms in https://arxiv.org/abs/2206.00927, and the `dpmsolver++` type implements the algorithms in
126
- https://arxiv.org/abs/2211.01095. We recommend to use `dpmsolver++` with `solver_order=2` for guided
127
- sampling (e.g. stable-diffusion).
128
- solver_type (`str`, default `midpoint`):
129
- the solver type for the second-order solver. Either `midpoint` or `heun`. The solver type slightly affects
130
- the sample quality, especially for small number of steps. We empirically find that `midpoint` solvers are
131
- slightly better, so we recommend to use the `midpoint` type.
132
- lower_order_final (`bool`, default `True`):
133
- whether to use lower-order solvers in the final steps. For singlestep schedulers, we recommend to enable
134
- this to use up all the function evaluations.
98
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
99
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
100
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
101
+ Video](https://imagen.research.google/video/paper.pdf) paper).
102
+ thresholding (`bool`, defaults to `False`):
103
+ Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
104
+ as Stable Diffusion.
105
+ dynamic_thresholding_ratio (`float`, defaults to 0.995):
106
+ The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
107
+ sample_max_value (`float`, defaults to 1.0):
108
+ The threshold value for dynamic thresholding. Valid only when `thresholding=True` and
109
+ `algorithm_type="dpmsolver++"`.
110
+ algorithm_type (`str`, defaults to `dpmsolver++`):
111
+ Algorithm type for the solver; can be `dpmsolver`, `dpmsolver++`, `sde-dpmsolver` or `sde-dpmsolver++`. The
112
+ `dpmsolver` type implements the algorithms in the [DPMSolver](https://huggingface.co/papers/2206.00927)
113
+ paper, and the `dpmsolver++` type implements the algorithms in the
114
+ [DPMSolver++](https://huggingface.co/papers/2211.01095) paper. It is recommended to use `dpmsolver++` or
115
+ `sde-dpmsolver++` with `solver_order=2` for guided sampling like in Stable Diffusion.
116
+ solver_type (`str`, defaults to `midpoint`):
117
+ Solver type for the second-order solver; can be `midpoint` or `heun`. The solver type slightly affects the
118
+ sample quality, especially for a small number of steps. It is recommended to use `midpoint` solvers.
119
+ lower_order_final (`bool`, defaults to `True`):
120
+ Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can
121
+ stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10.
135
122
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
136
- This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
137
- noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
138
- of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
139
- lambda_min_clipped (`float`, default `-inf`):
140
- the clipping threshold for the minimum value of lambda(t) for numerical stability. This is critical for
141
- cosine (squaredcos_cap_v2) noise schedule.
123
+ Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
124
+ the sigmas are determined according to a sequence of noise levels {σi}.
125
+ lambda_min_clipped (`float`, defaults to `-inf`):
126
+ Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the
127
+ cosine (`squaredcos_cap_v2`) noise schedule.
142
128
  variance_type (`str`, *optional*):
143
- Set to "learned" or "learned_range" for diffusion models that predict variance. For example, OpenAI's
144
- guided-diffusion (https://github.com/openai/guided-diffusion) predicts both mean and variance of the
145
- Gaussian distribution in the model's output. DPM-Solver only needs the "mean" output because it is based on
146
- diffusion ODEs. whether the model's output contains the predicted Gaussian variance. For example, OpenAI's
147
- guided-diffusion (https://github.com/openai/guided-diffusion) predicts both mean and variance of the
148
- Gaussian distribution in the model's output. DPM-Solver only needs the "mean" output because it is based on
149
- diffusion ODEs.
150
-
129
+ Set to "learned" or "learned_range" for diffusion models that predict variance. If set, the model's output
130
+ contains the predicted Gaussian variance.
151
131
  """
152
132
 
153
133
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -224,7 +204,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
224
204
 
225
205
  Args:
226
206
  num_inference_steps (`int`):
227
- the number of diffusion steps used when generating samples with a pre-trained model.
207
+ The number of diffusion steps used when generating samples with a pre-trained model.
228
208
  """
229
209
  steps = num_inference_steps
230
210
  order = self.config.solver_order
@@ -254,13 +234,13 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
254
234
 
255
235
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
256
236
  """
257
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
237
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
258
238
 
259
239
  Args:
260
240
  num_inference_steps (`int`):
261
- the number of diffusion steps used when generating samples with a pre-trained model.
262
- device (`str` or `torch.device`, optional):
263
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
241
+ The number of diffusion steps used when generating samples with a pre-trained model.
242
+ device (`str` or `torch.device`, *optional*):
243
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
264
244
  """
265
245
  self.num_inference_steps = num_inference_steps
266
246
  # Clipping the minimum of all lambda(t) for numerical stability.
@@ -371,23 +351,28 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
371
351
  self, model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor
372
352
  ) -> torch.FloatTensor:
373
353
  """
374
- Convert the model output to the corresponding type that the algorithm (DPM-Solver / DPM-Solver++) needs.
354
+ Convert the model output to the corresponding type the DPMSolver/DPMSolver++ algorithm needs. DPM-Solver is
355
+ designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to discretize an
356
+ integral of the data prediction model.
357
+
358
+ <Tip>
375
359
 
376
- DPM-Solver is designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to
377
- discretize an integral of the data prediction model. So we need to first convert the model output to the
378
- corresponding type to match the algorithm.
360
+ The algorithm and model type are decoupled. You can use either DPMSolver or DPMSolver++ for both noise
361
+ prediction and data prediction models.
379
362
 
380
- Note that the algorithm type and the model type is decoupled. That is to say, we can use either DPM-Solver or
381
- DPM-Solver++ for both noise prediction model and data prediction model.
363
+ </Tip>
382
364
 
383
365
  Args:
384
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
385
- timestep (`int`): current discrete timestep in the diffusion chain.
366
+ model_output (`torch.FloatTensor`):
367
+ The direct output from the learned diffusion model.
368
+ timestep (`int`):
369
+ The current discrete timestep in the diffusion chain.
386
370
  sample (`torch.FloatTensor`):
387
- current instance of sample being created by diffusion process.
371
+ A current instance of a sample created by the diffusion process.
388
372
 
389
373
  Returns:
390
- `torch.FloatTensor`: the converted model output.
374
+ `torch.FloatTensor`:
375
+ The converted model output.
391
376
  """
392
377
  # DPM-Solver++ needs to solve an integral of the data prediction model.
393
378
  if self.config.algorithm_type == "dpmsolver++":
@@ -441,19 +426,21 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
441
426
  sample: torch.FloatTensor,
442
427
  ) -> torch.FloatTensor:
443
428
  """
444
- One step for the first-order DPM-Solver (equivalent to DDIM).
445
-
446
- See https://arxiv.org/abs/2206.00927 for the detailed derivation.
429
+ One step for the first-order DPMSolver (equivalent to DDIM).
447
430
 
448
431
  Args:
449
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
450
- timestep (`int`): current discrete timestep in the diffusion chain.
451
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
432
+ model_output (`torch.FloatTensor`):
433
+ The direct output from the learned diffusion model.
434
+ timestep (`int`):
435
+ The current discrete timestep in the diffusion chain.
436
+ prev_timestep (`int`):
437
+ The previous discrete timestep in the diffusion chain.
452
438
  sample (`torch.FloatTensor`):
453
- current instance of sample being created by diffusion process.
439
+ A current instance of a sample created by the diffusion process.
454
440
 
455
441
  Returns:
456
- `torch.FloatTensor`: the sample tensor at the previous timestep.
442
+ `torch.FloatTensor`:
443
+ The sample tensor at the previous timestep.
457
444
  """
458
445
  lambda_t, lambda_s = self.lambda_t[prev_timestep], self.lambda_t[timestep]
459
446
  alpha_t, alpha_s = self.alpha_t[prev_timestep], self.alpha_t[timestep]
@@ -473,20 +460,22 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
473
460
  sample: torch.FloatTensor,
474
461
  ) -> torch.FloatTensor:
475
462
  """
476
- One step for the second-order singlestep DPM-Solver.
477
-
478
- It computes the solution at time `prev_timestep` from the time `timestep_list[-2]`.
463
+ One step for the second-order singlestep DPMSolver that computes the solution at time `prev_timestep` from the
464
+ time `timestep_list[-2]`.
479
465
 
480
466
  Args:
481
467
  model_output_list (`List[torch.FloatTensor]`):
482
- direct outputs from learned diffusion model at current and latter timesteps.
483
- timestep (`int`): current and latter discrete timestep in the diffusion chain.
484
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
468
+ The direct outputs from learned diffusion model at current and latter timesteps.
469
+ timestep (`int`):
470
+ The current and latter discrete timestep in the diffusion chain.
471
+ prev_timestep (`int`):
472
+ The previous discrete timestep in the diffusion chain.
485
473
  sample (`torch.FloatTensor`):
486
- current instance of sample being created by diffusion process.
474
+ A current instance of a sample created by the diffusion process.
487
475
 
488
476
  Returns:
489
- `torch.FloatTensor`: the sample tensor at the previous timestep.
477
+ `torch.FloatTensor`:
478
+ The sample tensor at the previous timestep.
490
479
  """
491
480
  t, s0, s1 = prev_timestep, timestep_list[-1], timestep_list[-2]
492
481
  m0, m1 = model_output_list[-1], model_output_list[-2]
@@ -534,20 +523,22 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
534
523
  sample: torch.FloatTensor,
535
524
  ) -> torch.FloatTensor:
536
525
  """
537
- One step for the third-order singlestep DPM-Solver.
538
-
539
- It computes the solution at time `prev_timestep` from the time `timestep_list[-3]`.
526
+ One step for the third-order singlestep DPMSolver that computes the solution at time `prev_timestep` from the
527
+ time `timestep_list[-3]`.
540
528
 
541
529
  Args:
542
530
  model_output_list (`List[torch.FloatTensor]`):
543
- direct outputs from learned diffusion model at current and latter timesteps.
544
- timestep (`int`): current and latter discrete timestep in the diffusion chain.
545
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
531
+ The direct outputs from learned diffusion model at current and latter timesteps.
532
+ timestep (`int`):
533
+ The current and latter discrete timestep in the diffusion chain.
534
+ prev_timestep (`int`):
535
+ The previous discrete timestep in the diffusion chain.
546
536
  sample (`torch.FloatTensor`):
547
- current instance of sample being created by diffusion process.
537
+ A current instance of a sample created by diffusion process.
548
538
 
549
539
  Returns:
550
- `torch.FloatTensor`: the sample tensor at the previous timestep.
540
+ `torch.FloatTensor`:
541
+ The sample tensor at the previous timestep.
551
542
  """
552
543
  t, s0, s1, s2 = prev_timestep, timestep_list[-1], timestep_list[-2], timestep_list[-3]
553
544
  m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3]
@@ -606,20 +597,23 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
606
597
  order: int,
607
598
  ) -> torch.FloatTensor:
608
599
  """
609
- One step for the singlestep DPM-Solver.
600
+ One step for the singlestep DPMSolver.
610
601
 
611
602
  Args:
612
603
  model_output_list (`List[torch.FloatTensor]`):
613
- direct outputs from learned diffusion model at current and latter timesteps.
614
- timestep (`int`): current and latter discrete timestep in the diffusion chain.
615
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
604
+ The direct outputs from learned diffusion model at current and latter timesteps.
605
+ timestep (`int`):
606
+ The current and latter discrete timestep in the diffusion chain.
607
+ prev_timestep (`int`):
608
+ The previous discrete timestep in the diffusion chain.
616
609
  sample (`torch.FloatTensor`):
617
- current instance of sample being created by diffusion process.
610
+ A current instance of a sample created by diffusion process.
618
611
  order (`int`):
619
- the solver order at this step.
612
+ The solver order at this step.
620
613
 
621
614
  Returns:
622
- `torch.FloatTensor`: the sample tensor at the previous timestep.
615
+ `torch.FloatTensor`:
616
+ The sample tensor at the previous timestep.
623
617
  """
624
618
  if order == 1:
625
619
  return self.dpm_solver_first_order_update(model_output_list[-1], timestep_list[-1], prev_timestep, sample)
@@ -642,18 +636,23 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
642
636
  return_dict: bool = True,
643
637
  ) -> Union[SchedulerOutput, Tuple]:
644
638
  """
645
- Step function propagating the sample with the singlestep DPM-Solver.
639
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
640
+ the singlestep DPMSolver.
646
641
 
647
642
  Args:
648
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
649
- timestep (`int`): current discrete timestep in the diffusion chain.
643
+ model_output (`torch.FloatTensor`):
644
+ The direct output from learned diffusion model.
645
+ timestep (`int`):
646
+ The current discrete timestep in the diffusion chain.
650
647
  sample (`torch.FloatTensor`):
651
- current instance of sample being created by diffusion process.
652
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
648
+ A current instance of a sample created by the diffusion process.
649
+ return_dict (`bool`):
650
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`.
653
651
 
654
652
  Returns:
655
- [`~scheduling_utils.SchedulerOutput`] or `tuple`: [`~scheduling_utils.SchedulerOutput`] if `return_dict` is
656
- True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor.
653
+ [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
654
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
655
+ tuple is returned where the first element is the sample tensor.
657
656
 
658
657
  """
659
658
  if self.num_inference_steps is None:
@@ -702,10 +701,12 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
702
701
  current timestep.
703
702
 
704
703
  Args:
705
- sample (`torch.FloatTensor`): input sample
704
+ sample (`torch.FloatTensor`):
705
+ The input sample.
706
706
 
707
707
  Returns:
708
- `torch.FloatTensor`: scaled input sample
708
+ `torch.FloatTensor`:
709
+ A scaled input sample.
709
710
  """
710
711
  return sample
711
712
 
@@ -31,14 +31,14 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
31
31
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->EulerAncestralDiscrete
32
32
  class EulerAncestralDiscreteSchedulerOutput(BaseOutput):
33
33
  """
34
- Output class for the scheduler's step function output.
34
+ Output class for the scheduler's `step` function output.
35
35
 
36
36
  Args:
37
37
  prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
38
- Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
38
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
39
39
  denoising loop.
40
40
  pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
41
- The predicted denoised sample (x_{0}) based on the model output from the current timestep.
41
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
42
42
  `pred_original_sample` can be used to preview progress or for guidance.
43
43
  """
44
44
 
@@ -93,34 +93,34 @@ def betas_for_alpha_bar(
93
93
 
94
94
  class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
95
95
  """
96
- Ancestral sampling with Euler method steps. Based on the original k-diffusion implementation by Katherine Crowson:
97
- https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L72
96
+ Ancestral sampling with Euler method steps.
98
97
 
99
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
100
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
101
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
102
- [`~SchedulerMixin.from_pretrained`] functions.
98
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
99
+ methods the library implements for all schedulers such as loading and saving.
103
100
 
104
101
  Args:
105
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
106
- beta_start (`float`): the starting `beta` value of inference.
107
- beta_end (`float`): the final `beta` value.
108
- beta_schedule (`str`):
109
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
102
+ num_train_timesteps (`int`, defaults to 1000):
103
+ The number of diffusion steps to train the model.
104
+ beta_start (`float`, defaults to 0.0001):
105
+ The starting `beta` value of inference.
106
+ beta_end (`float`, defaults to 0.02):
107
+ The final `beta` value.
108
+ beta_schedule (`str`, defaults to `"linear"`):
109
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
110
110
  `linear` or `scaled_linear`.
111
- trained_betas (`np.ndarray`, optional):
112
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
113
- prediction_type (`str`, default `epsilon`, optional):
114
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
115
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
116
- https://imagen.research.google/video/paper.pdf)
117
- timestep_spacing (`str`, default `"linspace"`):
118
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
119
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
120
- steps_offset (`int`, default `0`):
121
- an offset added to the inference steps. You can use a combination of `offset=1` and
122
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
123
- stable diffusion.
111
+ trained_betas (`np.ndarray`, *optional*):
112
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
113
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
114
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
115
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
116
+ Video](https://imagen.research.google/video/paper.pdf) paper).
117
+ timestep_spacing (`str`, defaults to `"linspace"`):
118
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
119
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
120
+ steps_offset (`int`, defaults to 0):
121
+ An offset added to the inference steps. You can use a combination of `offset=1` and
122
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
123
+ Diffusion.
124
124
  """
125
125
 
126
126
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -178,14 +178,18 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
178
178
  self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
179
179
  ) -> torch.FloatTensor:
180
180
  """
181
- Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm.
181
+ Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
182
+ current timestep. Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm.
182
183
 
183
184
  Args:
184
- sample (`torch.FloatTensor`): input sample
185
- timestep (`float` or `torch.FloatTensor`): the current timestep in the diffusion chain
185
+ sample (`torch.FloatTensor`):
186
+ The input sample.
187
+ timestep (`int`, *optional*):
188
+ The current timestep in the diffusion chain.
186
189
 
187
190
  Returns:
188
- `torch.FloatTensor`: scaled input sample
191
+ `torch.FloatTensor`:
192
+ A scaled input sample.
189
193
  """
190
194
  if isinstance(timestep, torch.Tensor):
191
195
  timestep = timestep.to(self.timesteps.device)
@@ -197,13 +201,13 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
197
201
 
198
202
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
199
203
  """
200
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
204
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
201
205
 
202
206
  Args:
203
207
  num_inference_steps (`int`):
204
- the number of diffusion steps used when generating samples with a pre-trained model.
205
- device (`str` or `torch.device`, optional):
206
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
208
+ The number of diffusion steps used when generating samples with a pre-trained model.
209
+ device (`str` or `torch.device`, *optional*):
210
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
207
211
  """
208
212
  self.num_inference_steps = num_inference_steps
209
213
 
@@ -248,21 +252,27 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
248
252
  return_dict: bool = True,
249
253
  ) -> Union[EulerAncestralDiscreteSchedulerOutput, Tuple]:
250
254
  """
251
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
255
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
252
256
  process from the learned model outputs (most often the predicted noise).
253
257
 
254
258
  Args:
255
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
256
- timestep (`float`): current timestep in the diffusion chain.
259
+ model_output (`torch.FloatTensor`):
260
+ The direct output from learned diffusion model.
261
+ timestep (`float`):
262
+ The current discrete timestep in the diffusion chain.
257
263
  sample (`torch.FloatTensor`):
258
- current instance of sample being created by diffusion process.
259
- generator (`torch.Generator`, optional): Random number generator.
260
- return_dict (`bool`): option for returning tuple rather than EulerAncestralDiscreteSchedulerOutput class
264
+ A current instance of a sample created by the diffusion process.
265
+ generator (`torch.Generator`, *optional*):
266
+ A random number generator.
267
+ return_dict (`bool`):
268
+ Whether or not to return a
269
+ [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or tuple.
261
270
 
262
271
  Returns:
263
- [`~schedulers.scheduling_utils.EulerAncestralDiscreteSchedulerOutput`] or `tuple`:
264
- [`~schedulers.scheduling_utils.EulerAncestralDiscreteSchedulerOutput`] if `return_dict` is True, otherwise
265
- a `tuple`. When returning a tuple, the first element is the sample tensor.
272
+ [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or `tuple`:
273
+ If return_dict is `True`,
274
+ [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] is returned,
275
+ otherwise a tuple is returned where the first element is the sample tensor.
266
276
 
267
277
  """
268
278