diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. diffusers/__init__.py +3 -1
  2. diffusers/commands/fp16_safetensors.py +2 -7
  3. diffusers/configuration_utils.py +23 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/loaders.py +62 -64
  6. diffusers/models/__init__.py +1 -0
  7. diffusers/models/activations.py +2 -0
  8. diffusers/models/attention.py +45 -1
  9. diffusers/models/autoencoder_tiny.py +193 -0
  10. diffusers/models/controlnet.py +1 -1
  11. diffusers/models/embeddings.py +56 -0
  12. diffusers/models/lora.py +0 -6
  13. diffusers/models/modeling_flax_utils.py +28 -2
  14. diffusers/models/modeling_utils.py +33 -16
  15. diffusers/models/transformer_2d.py +26 -9
  16. diffusers/models/unet_1d.py +2 -2
  17. diffusers/models/unet_2d_blocks.py +106 -56
  18. diffusers/models/unet_2d_condition.py +20 -5
  19. diffusers/models/vae.py +106 -1
  20. diffusers/pipelines/__init__.py +1 -0
  21. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
  22. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
  23. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
  24. diffusers/pipelines/auto_pipeline.py +33 -43
  25. diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
  26. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
  27. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
  28. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
  29. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
  30. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
  31. diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
  32. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
  33. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
  34. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
  35. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
  36. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
  37. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
  38. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
  39. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  40. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  41. diffusers/pipelines/pipeline_flax_utils.py +41 -4
  42. diffusers/pipelines/pipeline_utils.py +60 -16
  43. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
  44. diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  45. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
  46. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
  47. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
  48. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
  49. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
  50. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
  51. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
  52. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
  53. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
  54. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
  55. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
  56. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
  57. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
  58. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
  59. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
  60. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
  61. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
  65. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
  66. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
  67. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
  68. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
  69. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
  70. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
  71. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
  72. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
  73. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
  74. diffusers/schedulers/scheduling_consistency_models.py +70 -57
  75. diffusers/schedulers/scheduling_ddim.py +76 -71
  76. diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
  77. diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
  78. diffusers/schedulers/scheduling_ddpm.py +68 -67
  79. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
  80. diffusers/schedulers/scheduling_deis_multistep.py +93 -85
  81. diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
  82. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
  83. diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
  84. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
  85. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
  86. diffusers/schedulers/scheduling_euler_discrete.py +63 -56
  87. diffusers/schedulers/scheduling_heun_discrete.py +57 -45
  88. diffusers/schedulers/scheduling_ipndm.py +27 -22
  89. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
  90. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
  91. diffusers/schedulers/scheduling_karras_ve.py +55 -45
  92. diffusers/schedulers/scheduling_lms_discrete.py +58 -52
  93. diffusers/schedulers/scheduling_pndm.py +77 -62
  94. diffusers/schedulers/scheduling_repaint.py +56 -38
  95. diffusers/schedulers/scheduling_sde_ve.py +62 -50
  96. diffusers/schedulers/scheduling_sde_vp.py +32 -11
  97. diffusers/schedulers/scheduling_unclip.py +3 -3
  98. diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
  99. diffusers/schedulers/scheduling_utils.py +41 -35
  100. diffusers/schedulers/scheduling_utils_flax.py +8 -2
  101. diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
  102. diffusers/utils/__init__.py +2 -2
  103. diffusers/utils/dummy_pt_objects.py +15 -0
  104. diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
  105. diffusers/utils/hub_utils.py +105 -2
  106. diffusers/utils/import_utils.py +0 -4
  107. diffusers/utils/pil_utils.py +19 -0
  108. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
  109. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
  110. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
  111. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
  112. diffusers/models/cross_attention.py +0 -94
  113. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
  114. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -72,79 +72,66 @@ def betas_for_alpha_bar(
72
72
 
73
73
  class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
74
74
  """
75
- DPMSolverMultistepInverseScheduler is the reverse scheduler of [`DPMSolverMultistepScheduler`].
75
+ `DPMSolverMultistepInverseScheduler` is the reverse scheduler of [`DPMSolverMultistepScheduler`].
76
76
 
77
- We also support the "dynamic thresholding" method in Imagen (https://arxiv.org/abs/2205.11487). For pixel-space
78
- diffusion models, you can set both `algorithm_type="dpmsolver++"` and `thresholding=True` to use the dynamic
79
- thresholding. Note that the thresholding method is unsuitable for latent-space diffusion models (such as
80
- stable-diffusion).
81
-
82
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
83
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
84
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
85
- [`~SchedulerMixin.from_pretrained`] functions.
77
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
78
+ methods the library implements for all schedulers such as loading and saving.
86
79
 
87
80
  Args:
88
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
89
- beta_start (`float`): the starting `beta` value of inference.
90
- beta_end (`float`): the final `beta` value.
91
- beta_schedule (`str`):
92
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
81
+ num_train_timesteps (`int`, defaults to 1000):
82
+ The number of diffusion steps to train the model.
83
+ beta_start (`float`, defaults to 0.0001):
84
+ The starting `beta` value of inference.
85
+ beta_end (`float`, defaults to 0.02):
86
+ The final `beta` value.
87
+ beta_schedule (`str`, defaults to `"linear"`):
88
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
93
89
  `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
94
- trained_betas (`np.ndarray`, optional):
95
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
96
- solver_order (`int`, default `2`):
97
- the order of DPM-Solver; can be `1` or `2` or `3`. We recommend to use `solver_order=2` for guided
90
+ trained_betas (`np.ndarray`, *optional*):
91
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
92
+ solver_order (`int`, defaults to 2):
93
+ The DPMSolver order which can be `1` or `2` or `3`. It is recommended to use `solver_order=2` for guided
98
94
  sampling, and `solver_order=3` for unconditional sampling.
99
- prediction_type (`str`, default `epsilon`, optional):
100
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
101
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
102
- https://imagen.research.google/video/paper.pdf)
103
- thresholding (`bool`, default `False`):
104
- whether to use the "dynamic thresholding" method (introduced by Imagen, https://arxiv.org/abs/2205.11487).
105
- For pixel-space diffusion models, you can set both `algorithm_type=dpmsolver++` and `thresholding=True` to
106
- use the dynamic thresholding. Note that the thresholding method is unsuitable for latent-space diffusion
107
- models (such as stable-diffusion).
108
- dynamic_thresholding_ratio (`float`, default `0.995`):
109
- the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
110
- (https://arxiv.org/abs/2205.11487).
111
- sample_max_value (`float`, default `1.0`):
112
- the threshold value for dynamic thresholding. Valid only when `thresholding=True` and
113
- `algorithm_type="dpmsolver++`.
114
- algorithm_type (`str`, default `dpmsolver++`):
115
- the algorithm type for the solver. Either `dpmsolver` or `dpmsolver++` or `sde-dpmsolver` or
116
- `sde-dpmsolver++`. The `dpmsolver` type implements the algorithms in https://arxiv.org/abs/2206.00927, and
117
- the `dpmsolver++` type implements the algorithms in https://arxiv.org/abs/2211.01095. We recommend to use
118
- `dpmsolver++` or `sde-dpmsolver++` with `solver_order=2` for guided sampling (e.g. stable-diffusion).
119
- solver_type (`str`, default `midpoint`):
120
- the solver type for the second-order solver. Either `midpoint` or `heun`. The solver type slightly affects
121
- the sample quality, especially for small number of steps. We empirically find that `midpoint` solvers are
122
- slightly better, so we recommend to use the `midpoint` type.
123
- lower_order_final (`bool`, default `True`):
124
- whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. We empirically
125
- find this trick can stabilize the sampling of DPM-Solver for steps < 15, especially for steps <= 10.
95
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
96
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
97
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
98
+ Video](https://imagen.research.google/video/paper.pdf) paper).
99
+ thresholding (`bool`, defaults to `False`):
100
+ Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
101
+ as Stable Diffusion.
102
+ dynamic_thresholding_ratio (`float`, defaults to 0.995):
103
+ The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
104
+ sample_max_value (`float`, defaults to 1.0):
105
+ The threshold value for dynamic thresholding. Valid only when `thresholding=True` and
106
+ `algorithm_type="dpmsolver++"`.
107
+ algorithm_type (`str`, defaults to `dpmsolver++`):
108
+ Algorithm type for the solver; can be `dpmsolver`, `dpmsolver++`, `sde-dpmsolver` or `sde-dpmsolver++`. The
109
+ `dpmsolver` type implements the algorithms in the [DPMSolver](https://huggingface.co/papers/2206.00927)
110
+ paper, and the `dpmsolver++` type implements the algorithms in the
111
+ [DPMSolver++](https://huggingface.co/papers/2211.01095) paper. It is recommended to use `dpmsolver++` or
112
+ `sde-dpmsolver++` with `solver_order=2` for guided sampling like in Stable Diffusion.
113
+ solver_type (`str`, defaults to `midpoint`):
114
+ Solver type for the second-order solver; can be `midpoint` or `heun`. The solver type slightly affects the
115
+ sample quality, especially for a small number of steps. It is recommended to use `midpoint` solvers.
116
+ lower_order_final (`bool`, defaults to `True`):
117
+ Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can
118
+ stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10.
126
119
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
127
- This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
128
- noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
129
- of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
130
- lambda_min_clipped (`float`, default `-inf`):
131
- the clipping threshold for the minimum value of lambda(t) for numerical stability. This is critical for
132
- cosine (squaredcos_cap_v2) noise schedule.
120
+ Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
121
+ the sigmas are determined according to a sequence of noise levels {σi}.
122
+ lambda_min_clipped (`float`, defaults to `-inf`):
123
+ Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the
124
+ cosine (`squaredcos_cap_v2`) noise schedule.
133
125
  variance_type (`str`, *optional*):
134
- Set to "learned" or "learned_range" for diffusion models that predict variance. For example, OpenAI's
135
- guided-diffusion (https://github.com/openai/guided-diffusion) predicts both mean and variance of the
136
- Gaussian distribution in the model's output. DPM-Solver only needs the "mean" output because it is based on
137
- diffusion ODEs. whether the model's output contains the predicted Gaussian variance. For example, OpenAI's
138
- guided-diffusion (https://github.com/openai/guided-diffusion) predicts both mean and variance of the
139
- Gaussian distribution in the model's output. DPM-Solver only needs the "mean" output because it is based on
140
- diffusion ODEs.
141
- timestep_spacing (`str`, default `"linspace"`):
142
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
143
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
144
- steps_offset (`int`, default `0`):
145
- an offset added to the inference steps. You can use a combination of `offset=1` and
146
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
147
- stable diffusion.
126
+ Set to "learned" or "learned_range" for diffusion models that predict variance. If set, the model's output
127
+ contains the predicted Gaussian variance.
128
+ timestep_spacing (`str`, defaults to `"linspace"`):
129
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
130
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
131
+ steps_offset (`int`, defaults to 0):
132
+ An offset added to the inference steps. You can use a combination of `offset=1` and
133
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
134
+ Diffusion.
148
135
  """
149
136
 
150
137
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -220,13 +207,13 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
220
207
 
221
208
  def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torch.device] = None):
222
209
  """
223
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
210
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
224
211
 
225
212
  Args:
226
213
  num_inference_steps (`int`):
227
- the number of diffusion steps used when generating samples with a pre-trained model.
228
- device (`str` or `torch.device`, optional):
229
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
214
+ The number of diffusion steps used when generating samples with a pre-trained model.
215
+ device (`str` or `torch.device`, *optional*):
216
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
230
217
  """
231
218
  # Clipping the minimum of all lambda(t) for numerical stability.
232
219
  # This is critical for cosine (squaredcos_cap_v2) noise schedule.
@@ -357,23 +344,28 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
357
344
  self, model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor
358
345
  ) -> torch.FloatTensor:
359
346
  """
360
- Convert the model output to the corresponding type that the algorithm (DPM-Solver / DPM-Solver++) needs.
347
+ Convert the model output to the corresponding type the DPMSolver/DPMSolver++ algorithm needs. DPM-Solver is
348
+ designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to discretize an
349
+ integral of the data prediction model.
350
+
351
+ <Tip>
361
352
 
362
- DPM-Solver is designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to
363
- discretize an integral of the data prediction model. So we need to first convert the model output to the
364
- corresponding type to match the algorithm.
353
+ The algorithm and model type are decoupled. You can use either DPMSolver or DPMSolver++ for both noise
354
+ prediction and data prediction models.
365
355
 
366
- Note that the algorithm type and the model type is decoupled. That is to say, we can use either DPM-Solver or
367
- DPM-Solver++ for both noise prediction model and data prediction model.
356
+ </Tip>
368
357
 
369
358
  Args:
370
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
371
- timestep (`int`): current discrete timestep in the diffusion chain.
359
+ model_output (`torch.FloatTensor`):
360
+ The direct output from the learned diffusion model.
361
+ timestep (`int`):
362
+ The current discrete timestep in the diffusion chain.
372
363
  sample (`torch.FloatTensor`):
373
- current instance of sample being created by diffusion process.
364
+ A current instance of a sample created by the diffusion process.
374
365
 
375
366
  Returns:
376
- `torch.FloatTensor`: the converted model output.
367
+ `torch.FloatTensor`:
368
+ The converted model output.
377
369
  """
378
370
 
379
371
  # DPM-Solver++ needs to solve an integral of the data prediction model.
@@ -437,19 +429,21 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
437
429
  noise: Optional[torch.FloatTensor] = None,
438
430
  ) -> torch.FloatTensor:
439
431
  """
440
- One step for the first-order DPM-Solver (equivalent to DDIM).
441
-
442
- See https://arxiv.org/abs/2206.00927 for the detailed derivation.
432
+ One step for the first-order DPMSolver (equivalent to DDIM).
443
433
 
444
434
  Args:
445
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
446
- timestep (`int`): current discrete timestep in the diffusion chain.
447
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
435
+ model_output (`torch.FloatTensor`):
436
+ The direct output from the learned diffusion model.
437
+ timestep (`int`):
438
+ The current discrete timestep in the diffusion chain.
439
+ prev_timestep (`int`):
440
+ The previous discrete timestep in the diffusion chain.
448
441
  sample (`torch.FloatTensor`):
449
- current instance of sample being created by diffusion process.
442
+ A current instance of a sample created by the diffusion process.
450
443
 
451
444
  Returns:
452
- `torch.FloatTensor`: the sample tensor at the previous timestep.
445
+ `torch.FloatTensor`:
446
+ The sample tensor at the previous timestep.
453
447
  """
454
448
  lambda_t, lambda_s = self.lambda_t[prev_timestep], self.lambda_t[timestep]
455
449
  alpha_t, alpha_s = self.alpha_t[prev_timestep], self.alpha_t[timestep]
@@ -474,18 +468,21 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
474
468
  noise: Optional[torch.FloatTensor] = None,
475
469
  ) -> torch.FloatTensor:
476
470
  """
477
- One step for the second-order multistep DPM-Solver.
471
+ One step for the second-order multistep DPMSolver.
478
472
 
479
473
  Args:
480
474
  model_output_list (`List[torch.FloatTensor]`):
481
- direct outputs from learned diffusion model at current and latter timesteps.
482
- timestep (`int`): current and latter discrete timestep in the diffusion chain.
483
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
475
+ The direct outputs from learned diffusion model at current and latter timesteps.
476
+ timestep (`int`):
477
+ The current and latter discrete timestep in the diffusion chain.
478
+ prev_timestep (`int`):
479
+ The previous discrete timestep in the diffusion chain.
484
480
  sample (`torch.FloatTensor`):
485
- current instance of sample being created by diffusion process.
481
+ A current instance of a sample created by the diffusion process.
486
482
 
487
483
  Returns:
488
- `torch.FloatTensor`: the sample tensor at the previous timestep.
484
+ `torch.FloatTensor`:
485
+ The sample tensor at the previous timestep.
489
486
  """
490
487
  t, s0, s1 = prev_timestep, timestep_list[-1], timestep_list[-2]
491
488
  m0, m1 = model_output_list[-1], model_output_list[-2]
@@ -538,18 +535,21 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
538
535
  sample: torch.FloatTensor,
539
536
  ) -> torch.FloatTensor:
540
537
  """
541
- One step for the third-order multistep DPM-Solver.
538
+ One step for the third-order multistep DPMSolver.
542
539
 
543
540
  Args:
544
541
  model_output_list (`List[torch.FloatTensor]`):
545
- direct outputs from learned diffusion model at current and latter timesteps.
546
- timestep (`int`): current and latter discrete timestep in the diffusion chain.
547
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
542
+ The direct outputs from learned diffusion model at current and latter timesteps.
543
+ timestep (`int`):
544
+ The current and latter discrete timestep in the diffusion chain.
545
+ prev_timestep (`int`):
546
+ The previous discrete timestep in the diffusion chain.
548
547
  sample (`torch.FloatTensor`):
549
- current instance of sample being created by diffusion process.
548
+ A current instance of a sample created by diffusion process.
550
549
 
551
550
  Returns:
552
- `torch.FloatTensor`: the sample tensor at the previous timestep.
551
+ `torch.FloatTensor`:
552
+ The sample tensor at the previous timestep.
553
553
  """
554
554
  t, s0, s1, s2 = prev_timestep, timestep_list[-1], timestep_list[-2], timestep_list[-3]
555
555
  m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3]
@@ -594,18 +594,23 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
594
594
  return_dict: bool = True,
595
595
  ) -> Union[SchedulerOutput, Tuple]:
596
596
  """
597
- Step function propagating the sample with the multistep DPM-Solver.
597
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
598
+ the multistep DPMSolver.
598
599
 
599
600
  Args:
600
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
601
- timestep (`int`): current discrete timestep in the diffusion chain.
601
+ model_output (`torch.FloatTensor`):
602
+ The direct output from learned diffusion model.
603
+ timestep (`int`):
604
+ The current discrete timestep in the diffusion chain.
602
605
  sample (`torch.FloatTensor`):
603
- current instance of sample being created by diffusion process.
604
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
606
+ A current instance of a sample created by the diffusion process.
607
+ return_dict (`bool`):
608
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`.
605
609
 
606
610
  Returns:
607
- [`~scheduling_utils.SchedulerOutput`] or `tuple`: [`~scheduling_utils.SchedulerOutput`] if `return_dict` is
608
- True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor.
611
+ [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
612
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
613
+ tuple is returned where the first element is the sample tensor.
609
614
 
610
615
  """
611
616
  if self.num_inference_steps is None:
@@ -672,10 +677,12 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
672
677
  current timestep.
673
678
 
674
679
  Args:
675
- sample (`torch.FloatTensor`): input sample
680
+ sample (`torch.FloatTensor`):
681
+ The input sample.
676
682
 
677
683
  Returns:
678
- `torch.FloatTensor`: scaled input sample
684
+ `torch.FloatTensor`:
685
+ A scaled input sample.
679
686
  """
680
687
  return sample
681
688
 
@@ -123,39 +123,40 @@ def betas_for_alpha_bar(
123
123
 
124
124
  class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
125
125
  """
126
- Implements Stochastic Sampler (Algorithm 2) from Karras et al. (2022). Based on the original k-diffusion
127
- implementation by Katherine Crowson:
128
- https://github.com/crowsonkb/k-diffusion/blob/41b4cb6df0506694a7776af31349acf082bf6091/k_diffusion/sampling.py#L543
126
+ DPMSolverSDEScheduler implements the stochastic sampler from the [Elucidating the Design Space of Diffusion-Based
127
+ Generative Models](https://huggingface.co/papers/2206.00364) paper.
129
128
 
130
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
131
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
132
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
133
- [`~SchedulerMixin.from_pretrained`] functions.
129
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
130
+ methods the library implements for all schedulers such as loading and saving.
134
131
 
135
132
  Args:
136
- num_train_timesteps (`int`): number of diffusion steps used to train the model. beta_start (`float`): the
137
- starting `beta` value of inference. beta_end (`float`): the final `beta` value. beta_schedule (`str`):
138
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
133
+ num_train_timesteps (`int`, defaults to 1000):
134
+ The number of diffusion steps to train the model.
135
+ beta_start (`float`, defaults to 0.00085):
136
+ The starting `beta` value of inference.
137
+ beta_end (`float`, defaults to 0.012):
138
+ The final `beta` value.
139
+ beta_schedule (`str`, defaults to `"linear"`):
140
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
139
141
  `linear` or `scaled_linear`.
140
- trained_betas (`np.ndarray`, optional):
141
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
142
- prediction_type (`str`, default `epsilon`, optional):
143
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
144
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
145
- https://imagen.research.google/video/paper.pdf)
142
+ trained_betas (`np.ndarray`, *optional*):
143
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
144
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
145
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
146
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
147
+ Video](https://imagen.research.google/video/paper.pdf) paper).
146
148
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
147
- This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
148
- noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
149
- of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
149
+ Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
150
+ the sigmas are determined according to a sequence of noise levels {σi}.
150
151
  noise_sampler_seed (`int`, *optional*, defaults to `None`):
151
- The random seed to use for the noise sampler. If `None`, a random seed will be generated.
152
- timestep_spacing (`str`, default `"linspace"`):
153
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
154
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
155
- steps_offset (`int`, default `0`):
156
- an offset added to the inference steps. You can use a combination of `offset=1` and
157
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
158
- stable diffusion.
152
+ The random seed to use for the noise sampler. If `None`, a random seed is generated.
153
+ timestep_spacing (`str`, defaults to `"linspace"`):
154
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
155
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
156
+ steps_offset (`int`, defaults to 0):
157
+ An offset added to the inference steps. You can use a combination of `offset=1` and
158
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
159
+ Diffusion.
159
160
  """
160
161
 
161
162
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -232,12 +233,18 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
232
233
  timestep: Union[float, torch.FloatTensor],
233
234
  ) -> torch.FloatTensor:
234
235
  """
235
- Args:
236
236
  Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
237
237
  current timestep.
238
- sample (`torch.FloatTensor`): input sample timestep (`int`, optional): current timestep
238
+
239
+ Args:
240
+ sample (`torch.FloatTensor`):
241
+ The input sample.
242
+ timestep (`int`, *optional*):
243
+ The current timestep in the diffusion chain.
244
+
239
245
  Returns:
240
- `torch.FloatTensor`: scaled input sample
246
+ `torch.FloatTensor`:
247
+ A scaled input sample.
241
248
  """
242
249
  step_index = self.index_for_timestep(timestep)
243
250
 
@@ -253,13 +260,13 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
253
260
  num_train_timesteps: Optional[int] = None,
254
261
  ):
255
262
  """
256
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
263
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
257
264
 
258
265
  Args:
259
266
  num_inference_steps (`int`):
260
- the number of diffusion steps used when generating samples with a pre-trained model.
261
- device (`str` or `torch.device`, optional):
262
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
267
+ The number of diffusion steps used when generating samples with a pre-trained model.
268
+ device (`str` or `torch.device`, *optional*):
269
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
263
270
  """
264
271
  self.num_inference_steps = num_inference_steps
265
272
 
@@ -384,18 +391,25 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
384
391
  s_noise: float = 1.0,
385
392
  ) -> Union[SchedulerOutput, Tuple]:
386
393
  """
387
- Args:
388
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
394
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
389
395
  process from the learned model outputs (most often the predicted noise).
390
- model_output (Union[torch.FloatTensor, np.ndarray]): Direct output from learned diffusion model.
391
- timestep (Union[float, torch.FloatTensor]): Current discrete timestep in the diffusion chain.
392
- sample (Union[torch.FloatTensor, np.ndarray]): Current instance of sample being created by diffusion process.
393
- return_dict (bool, optional): Option for returning tuple rather than SchedulerOutput class. Defaults to True.
394
- s_noise (float, optional): Scaling factor for the noise added to the sample. Defaults to 1.0.
396
+
397
+ Args:
398
+ model_output (`torch.FloatTensor` or `np.ndarray`):
399
+ The direct output from learned diffusion model.
400
+ timestep (`float` or `torch.FloatTensor`):
401
+ The current discrete timestep in the diffusion chain.
402
+ sample (`torch.FloatTensor` or `np.ndarray`):
403
+ A current instance of a sample created by the diffusion process.
404
+ return_dict (`bool`, *optional*, defaults to `True`):
405
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
406
+ s_noise (`float`, *optional*, defaults to 1.0):
407
+ Scaling factor for noise added to the sample.
408
+
395
409
  Returns:
396
410
  [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
397
- [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
398
- returning a tuple, the first element is the sample tensor.
411
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
412
+ tuple is returned where the first element is the sample tensor.
399
413
  """
400
414
  step_index = self.index_for_timestep(timestep)
401
415