diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. diffusers/__init__.py +3 -1
  2. diffusers/commands/fp16_safetensors.py +2 -7
  3. diffusers/configuration_utils.py +23 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/loaders.py +62 -64
  6. diffusers/models/__init__.py +1 -0
  7. diffusers/models/activations.py +2 -0
  8. diffusers/models/attention.py +45 -1
  9. diffusers/models/autoencoder_tiny.py +193 -0
  10. diffusers/models/controlnet.py +1 -1
  11. diffusers/models/embeddings.py +56 -0
  12. diffusers/models/lora.py +0 -6
  13. diffusers/models/modeling_flax_utils.py +28 -2
  14. diffusers/models/modeling_utils.py +33 -16
  15. diffusers/models/transformer_2d.py +26 -9
  16. diffusers/models/unet_1d.py +2 -2
  17. diffusers/models/unet_2d_blocks.py +106 -56
  18. diffusers/models/unet_2d_condition.py +20 -5
  19. diffusers/models/vae.py +106 -1
  20. diffusers/pipelines/__init__.py +1 -0
  21. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
  22. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
  23. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
  24. diffusers/pipelines/auto_pipeline.py +33 -43
  25. diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
  26. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
  27. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
  28. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
  29. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
  30. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
  31. diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
  32. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
  33. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
  34. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
  35. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
  36. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
  37. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
  38. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
  39. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  40. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  41. diffusers/pipelines/pipeline_flax_utils.py +41 -4
  42. diffusers/pipelines/pipeline_utils.py +60 -16
  43. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
  44. diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  45. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
  46. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
  47. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
  48. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
  49. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
  50. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
  51. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
  52. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
  53. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
  54. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
  55. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
  56. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
  57. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
  58. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
  59. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
  60. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
  61. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
  65. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
  66. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
  67. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
  68. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
  69. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
  70. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
  71. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
  72. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
  73. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
  74. diffusers/schedulers/scheduling_consistency_models.py +70 -57
  75. diffusers/schedulers/scheduling_ddim.py +76 -71
  76. diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
  77. diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
  78. diffusers/schedulers/scheduling_ddpm.py +68 -67
  79. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
  80. diffusers/schedulers/scheduling_deis_multistep.py +93 -85
  81. diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
  82. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
  83. diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
  84. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
  85. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
  86. diffusers/schedulers/scheduling_euler_discrete.py +63 -56
  87. diffusers/schedulers/scheduling_heun_discrete.py +57 -45
  88. diffusers/schedulers/scheduling_ipndm.py +27 -22
  89. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
  90. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
  91. diffusers/schedulers/scheduling_karras_ve.py +55 -45
  92. diffusers/schedulers/scheduling_lms_discrete.py +58 -52
  93. diffusers/schedulers/scheduling_pndm.py +77 -62
  94. diffusers/schedulers/scheduling_repaint.py +56 -38
  95. diffusers/schedulers/scheduling_sde_ve.py +62 -50
  96. diffusers/schedulers/scheduling_sde_vp.py +32 -11
  97. diffusers/schedulers/scheduling_unclip.py +3 -3
  98. diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
  99. diffusers/schedulers/scheduling_utils.py +41 -35
  100. diffusers/schedulers/scheduling_utils_flax.py +8 -2
  101. diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
  102. diffusers/utils/__init__.py +2 -2
  103. diffusers/utils/dummy_pt_objects.py +15 -0
  104. diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
  105. diffusers/utils/hub_utils.py +105 -2
  106. diffusers/utils/import_utils.py +0 -4
  107. diffusers/utils/pil_utils.py +19 -0
  108. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
  109. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
  110. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
  111. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
  112. diffusers/models/cross_attention.py +0 -94
  113. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
  114. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -31,14 +31,14 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
31
31
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->EulerDiscrete
32
32
  class EulerDiscreteSchedulerOutput(BaseOutput):
33
33
  """
34
- Output class for the scheduler's step function output.
34
+ Output class for the scheduler's `step` function output.
35
35
 
36
36
  Args:
37
37
  prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
38
- Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
38
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
39
39
  denoising loop.
40
40
  pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
41
- The predicted denoised sample (x_{0}) based on the model output from the current timestep.
41
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
42
42
  `pred_original_sample` can be used to preview progress or for guidance.
43
43
  """
44
44
 
@@ -93,42 +93,40 @@ def betas_for_alpha_bar(
93
93
 
94
94
  class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
95
95
  """
96
- Euler scheduler (Algorithm 2) from Karras et al. (2022) https://arxiv.org/abs/2206.00364. . Based on the original
97
- k-diffusion implementation by Katherine Crowson:
98
- https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L51
96
+ Euler scheduler.
99
97
 
100
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
101
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
102
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
103
- [`~SchedulerMixin.from_pretrained`] functions.
98
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
99
+ methods the library implements for all schedulers such as loading and saving.
104
100
 
105
101
  Args:
106
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
107
- beta_start (`float`): the starting `beta` value of inference.
108
- beta_end (`float`): the final `beta` value.
109
- beta_schedule (`str`):
110
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
102
+ num_train_timesteps (`int`, defaults to 1000):
103
+ The number of diffusion steps to train the model.
104
+ beta_start (`float`, defaults to 0.0001):
105
+ The starting `beta` value of inference.
106
+ beta_end (`float`, defaults to 0.02):
107
+ The final `beta` value.
108
+ beta_schedule (`str`, defaults to `"linear"`):
109
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
111
110
  `linear` or `scaled_linear`.
112
- trained_betas (`np.ndarray`, optional):
113
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
114
- prediction_type (`str`, default `"epsilon"`, optional):
115
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
116
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
117
- https://imagen.research.google/video/paper.pdf)
118
- interpolation_type (`str`, default `"linear"`, optional):
119
- interpolation type to compute intermediate sigmas for the scheduler denoising steps. Should be one of
120
- [`"linear"`, `"log_linear"`].
111
+ trained_betas (`np.ndarray`, *optional*):
112
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
113
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
114
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
115
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
116
+ Video](https://imagen.research.google/video/paper.pdf) paper).
117
+ interpolation_type(`str`, defaults to `"linear"`, *optional*):
118
+ The interpolation type to compute intermediate sigmas for the scheduler denoising steps. Should be on of
119
+ `"linear"` or `"log_linear"`.
121
120
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
122
- This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
123
- noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
124
- of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
125
- timestep_spacing (`str`, default `"linspace"`):
126
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
127
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
128
- steps_offset (`int`, default `0`):
129
- an offset added to the inference steps. You can use a combination of `offset=1` and
130
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
131
- stable diffusion.
121
+ Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
122
+ the sigmas are determined according to a sequence of noise levels {σi}.
123
+ timestep_spacing (`str`, defaults to `"linspace"`):
124
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
125
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
126
+ steps_offset (`int`, defaults to 0):
127
+ An offset added to the inference steps. You can use a combination of `offset=1` and
128
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
129
+ Diffusion.
132
130
  """
133
131
 
134
132
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -189,14 +187,18 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
189
187
  self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
190
188
  ) -> torch.FloatTensor:
191
189
  """
192
- Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm.
190
+ Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
191
+ current timestep. Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm.
193
192
 
194
193
  Args:
195
- sample (`torch.FloatTensor`): input sample
196
- timestep (`float` or `torch.FloatTensor`): the current timestep in the diffusion chain
194
+ sample (`torch.FloatTensor`):
195
+ The input sample.
196
+ timestep (`int`, *optional*):
197
+ The current timestep in the diffusion chain.
197
198
 
198
199
  Returns:
199
- `torch.FloatTensor`: scaled input sample
200
+ `torch.FloatTensor`:
201
+ A scaled input sample.
200
202
  """
201
203
  if isinstance(timestep, torch.Tensor):
202
204
  timestep = timestep.to(self.timesteps.device)
@@ -210,13 +212,13 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
210
212
 
211
213
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
212
214
  """
213
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
215
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
214
216
 
215
217
  Args:
216
218
  num_inference_steps (`int`):
217
- the number of diffusion steps used when generating samples with a pre-trained model.
218
- device (`str` or `torch.device`, optional):
219
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
219
+ The number of diffusion steps used when generating samples with a pre-trained model.
220
+ device (`str` or `torch.device`, *optional*):
221
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
220
222
  """
221
223
  self.num_inference_steps = num_inference_steps
222
224
 
@@ -317,26 +319,31 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
317
319
  return_dict: bool = True,
318
320
  ) -> Union[EulerDiscreteSchedulerOutput, Tuple]:
319
321
  """
320
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
322
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
321
323
  process from the learned model outputs (most often the predicted noise).
322
324
 
323
325
  Args:
324
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
325
- timestep (`float`): current timestep in the diffusion chain.
326
+ model_output (`torch.FloatTensor`):
327
+ The direct output from learned diffusion model.
328
+ timestep (`float`):
329
+ The current discrete timestep in the diffusion chain.
326
330
  sample (`torch.FloatTensor`):
327
- current instance of sample being created by diffusion process.
328
- s_churn (`float`)
329
- s_tmin (`float`)
330
- s_tmax (`float`)
331
- s_noise (`float`)
332
- generator (`torch.Generator`, optional): Random number generator.
333
- return_dict (`bool`): option for returning tuple rather than EulerDiscreteSchedulerOutput class
331
+ A current instance of a sample created by the diffusion process.
332
+ s_churn (`float`):
333
+ s_tmin (`float`):
334
+ s_tmax (`float`):
335
+ s_noise (`float`, defaults to 1.0):
336
+ Scaling factor for noise added to the sample.
337
+ generator (`torch.Generator`, *optional*):
338
+ A random number generator.
339
+ return_dict (`bool`):
340
+ Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
341
+ tuple.
334
342
 
335
343
  Returns:
336
- [`~schedulers.scheduling_utils.EulerDiscreteSchedulerOutput`] or `tuple`:
337
- [`~schedulers.scheduling_utils.EulerDiscreteSchedulerOutput`] if `return_dict` is True, otherwise a
338
- `tuple`. When returning a tuple, the first element is the sample tensor.
339
-
344
+ [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`:
345
+ If return_dict is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
346
+ returned, otherwise a tuple is returned where the first element is the sample tensor.
340
347
  """
341
348
 
342
349
  if (
@@ -70,41 +70,41 @@ def betas_for_alpha_bar(
70
70
 
71
71
  class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
72
72
  """
73
- Implements Algorithm 2 (Heun steps) from Karras et al. (2022). for discrete beta schedules. Based on the original
74
- k-diffusion implementation by Katherine Crowson:
75
- https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L90
73
+ Scheduler with Heun steps for discrete beta schedules.
76
74
 
77
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
78
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
79
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
80
- [`~SchedulerMixin.from_pretrained`] functions.
75
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
76
+ methods the library implements for all schedulers such as loading and saving.
81
77
 
82
78
  Args:
83
- num_train_timesteps (`int`): number of diffusion steps used to train the model. beta_start (`float`): the
84
- starting `beta` value of inference. beta_end (`float`): the final `beta` value. beta_schedule (`str`):
85
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
79
+ num_train_timesteps (`int`, defaults to 1000):
80
+ The number of diffusion steps to train the model.
81
+ beta_start (`float`, defaults to 0.0001):
82
+ The starting `beta` value of inference.
83
+ beta_end (`float`, defaults to 0.02):
84
+ The final `beta` value.
85
+ beta_schedule (`str`, defaults to `"linear"`):
86
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
86
87
  `linear` or `scaled_linear`.
87
- trained_betas (`np.ndarray`, optional):
88
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
89
- prediction_type (`str`, default `epsilon`, optional):
90
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
91
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
92
- https://imagen.research.google/video/paper.pdf).
93
- clip_sample (`bool`, default `True`):
94
- option to clip predicted sample for numerical stability.
95
- clip_sample_range (`float`, default `1.0`):
96
- the maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
88
+ trained_betas (`np.ndarray`, *optional*):
89
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
90
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
91
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
92
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
93
+ Video](https://imagen.research.google/video/paper.pdf) paper).
94
+ clip_sample (`bool`, defaults to `True`):
95
+ Clip the predicted sample for numerical stability.
96
+ clip_sample_range (`float`, defaults to 1.0):
97
+ The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
97
98
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
98
- This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
99
- noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
100
- of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
101
- timestep_spacing (`str`, default `"linspace"`):
102
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
103
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
104
- steps_offset (`int`, default `0`):
105
- an offset added to the inference steps. You can use a combination of `offset=1` and
106
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
107
- stable diffusion.
99
+ Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
100
+ the sigmas are determined according to a sequence of noise levels {σi}.
101
+ timestep_spacing (`str`, defaults to `"linspace"`):
102
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
103
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
104
+ steps_offset (`int`, defaults to 0):
105
+ An offset added to the inference steps. You can use a combination of `offset=1` and
106
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
107
+ Diffusion.
108
108
  """
109
109
 
110
110
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -181,12 +181,18 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
181
181
  timestep: Union[float, torch.FloatTensor],
182
182
  ) -> torch.FloatTensor:
183
183
  """
184
- Args:
185
184
  Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
186
185
  current timestep.
187
- sample (`torch.FloatTensor`): input sample timestep (`int`, optional): current timestep
186
+
187
+ Args:
188
+ sample (`torch.FloatTensor`):
189
+ The input sample.
190
+ timestep (`int`, *optional*):
191
+ The current timestep in the diffusion chain.
192
+
188
193
  Returns:
189
- `torch.FloatTensor`: scaled input sample
194
+ `torch.FloatTensor`:
195
+ A scaled input sample.
190
196
  """
191
197
  step_index = self.index_for_timestep(timestep)
192
198
 
@@ -201,13 +207,13 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
201
207
  num_train_timesteps: Optional[int] = None,
202
208
  ):
203
209
  """
204
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
210
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
205
211
 
206
212
  Args:
207
213
  num_inference_steps (`int`):
208
- the number of diffusion steps used when generating samples with a pre-trained model.
209
- device (`str` or `torch.device`, optional):
210
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
214
+ The number of diffusion steps used when generating samples with a pre-trained model.
215
+ device (`str` or `torch.device`, *optional*):
216
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
211
217
  """
212
218
  self.num_inference_steps = num_inference_steps
213
219
 
@@ -312,17 +318,23 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
312
318
  return_dict: bool = True,
313
319
  ) -> Union[SchedulerOutput, Tuple]:
314
320
  """
315
- Args:
316
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
321
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
317
322
  process from the learned model outputs (most often the predicted noise).
318
- model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model. timestep
319
- (`int`): current discrete timestep in the diffusion chain. sample (`torch.FloatTensor` or `np.ndarray`):
320
- current instance of sample being created by diffusion process.
321
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
323
+
324
+ Args:
325
+ model_output (`torch.FloatTensor`):
326
+ The direct output from learned diffusion model.
327
+ timestep (`float`):
328
+ The current discrete timestep in the diffusion chain.
329
+ sample (`torch.FloatTensor`):
330
+ A current instance of a sample created by the diffusion process.
331
+ return_dict (`bool`):
332
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
333
+
322
334
  Returns:
323
335
  [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
324
- [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
325
- returning a tuple, the first element is the sample tensor.
336
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
337
+ tuple is returned where the first element is the sample tensor.
326
338
  """
327
339
  step_index = self.index_for_timestep(timestep)
328
340
 
@@ -24,18 +24,16 @@ from .scheduling_utils import SchedulerMixin, SchedulerOutput
24
24
 
25
25
  class IPNDMScheduler(SchedulerMixin, ConfigMixin):
26
26
  """
27
- Improved Pseudo numerical methods for diffusion models (iPNDM) ported from @crowsonkb's amazing k-diffusion
28
- [library](https://github.com/crowsonkb/v-diffusion-pytorch/blob/987f8985e38208345c1959b0ea767a625831cc9b/diffusion/sampling.py#L296)
27
+ A fourth-order Improved Pseudo Linear Multistep scheduler.
29
28
 
30
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
31
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
32
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
33
- [`~SchedulerMixin.from_pretrained`] functions.
34
-
35
- For more details, see the original paper: https://arxiv.org/abs/2202.09778
29
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
30
+ methods the library implements for all schedulers such as loading and saving.
36
31
 
37
32
  Args:
38
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
33
+ num_train_timesteps (`int`, defaults to 1000):
34
+ The number of diffusion steps to train the model.
35
+ trained_betas (`np.ndarray`, *optional*):
36
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
39
37
  """
40
38
 
41
39
  order = 1
@@ -60,11 +58,13 @@ class IPNDMScheduler(SchedulerMixin, ConfigMixin):
60
58
 
61
59
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
62
60
  """
63
- Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
61
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
64
62
 
65
63
  Args:
66
64
  num_inference_steps (`int`):
67
- the number of diffusion steps used when generating samples with a pre-trained model.
65
+ The number of diffusion steps used when generating samples with a pre-trained model.
66
+ device (`str` or `torch.device`, *optional*):
67
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
68
68
  """
69
69
  self.num_inference_steps = num_inference_steps
70
70
  steps = torch.linspace(1, 0, num_inference_steps + 1)[:-1]
@@ -90,20 +90,23 @@ class IPNDMScheduler(SchedulerMixin, ConfigMixin):
90
90
  return_dict: bool = True,
91
91
  ) -> Union[SchedulerOutput, Tuple]:
92
92
  """
93
- Step function propagating the sample with the linear multi-step method. This has one forward pass with multiple
94
- times to approximate the solution.
93
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
94
+ the linear multistep method. It performs one forward pass multiple times to approximate the solution.
95
95
 
96
96
  Args:
97
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
98
- timestep (`int`): current discrete timestep in the diffusion chain.
97
+ model_output (`torch.FloatTensor`):
98
+ The direct output from learned diffusion model.
99
+ timestep (`int`):
100
+ The current discrete timestep in the diffusion chain.
99
101
  sample (`torch.FloatTensor`):
100
- current instance of sample being created by diffusion process.
101
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
102
+ A current instance of a sample created by the diffusion process.
103
+ return_dict (`bool`):
104
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
102
105
 
103
106
  Returns:
104
- [`~scheduling_utils.SchedulerOutput`] or `tuple`: [`~scheduling_utils.SchedulerOutput`] if `return_dict` is
105
- True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor.
106
-
107
+ [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
108
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
109
+ tuple is returned where the first element is the sample tensor.
107
110
  """
108
111
  if self.num_inference_steps is None:
109
112
  raise ValueError(
@@ -138,10 +141,12 @@ class IPNDMScheduler(SchedulerMixin, ConfigMixin):
138
141
  current timestep.
139
142
 
140
143
  Args:
141
- sample (`torch.FloatTensor`): input sample
144
+ sample (`torch.FloatTensor`):
145
+ The input sample.
142
146
 
143
147
  Returns:
144
- `torch.FloatTensor`: scaled input sample
148
+ `torch.FloatTensor`:
149
+ A scaled input sample.
145
150
  """
146
151
  return sample
147
152
 
@@ -71,36 +71,35 @@ def betas_for_alpha_bar(
71
71
 
72
72
  class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
73
73
  """
74
- Scheduler created by @crowsonkb in [k_diffusion](https://github.com/crowsonkb/k-diffusion), see:
75
- https://github.com/crowsonkb/k-diffusion/blob/5b3af030dd83e0297272d861c19477735d0317ec/k_diffusion/sampling.py#L188
74
+ KDPM2DiscreteScheduler with ancestral sampling is inspired by the DPMSolver2 and Algorithm 2 from the [Elucidating
75
+ the Design Space of Diffusion-Based Generative Models](https://huggingface.co/papers/2206.00364) paper.
76
76
 
77
- Scheduler inspired by DPM-Solver-2 and Algorthim 2 from Karras et al. (2022).
78
-
79
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
80
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
81
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
82
- [`~SchedulerMixin.from_pretrained`] functions.
77
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
78
+ methods the library implements for all schedulers such as loading and saving.
83
79
 
84
80
  Args:
85
- num_train_timesteps (`int`): number of diffusion steps used to train the model. beta_start (`float`): the
86
- starting `beta` value of inference. beta_end (`float`): the final `beta` value. beta_schedule (`str`):
87
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
81
+ num_train_timesteps (`int`, defaults to 1000):
82
+ The number of diffusion steps to train the model.
83
+ beta_start (`float`, defaults to 0.00085):
84
+ The starting `beta` value of inference.
85
+ beta_end (`float`, defaults to 0.012):
86
+ The final `beta` value.
87
+ beta_schedule (`str`, defaults to `"linear"`):
88
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
88
89
  `linear` or `scaled_linear`.
89
- trained_betas (`np.ndarray`, optional):
90
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
91
- options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`,
92
- `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
93
- prediction_type (`str`, default `epsilon`, optional):
94
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
95
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
96
- https://imagen.research.google/video/paper.pdf)
97
- timestep_spacing (`str`, default `"linspace"`):
98
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
99
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
100
- steps_offset (`int`, default `0`):
101
- an offset added to the inference steps. You can use a combination of `offset=1` and
102
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
103
- stable diffusion.
90
+ trained_betas (`np.ndarray`, *optional*):
91
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
92
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
93
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
94
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
95
+ Video](https://imagen.research.google/video/paper.pdf) paper).
96
+ timestep_spacing (`str`, defaults to `"linspace"`):
97
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
98
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
99
+ steps_offset (`int`, defaults to 0):
100
+ An offset added to the inference steps. You can use a combination of `offset=1` and
101
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
102
+ Diffusion.
104
103
  """
105
104
 
106
105
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -172,12 +171,18 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
172
171
  timestep: Union[float, torch.FloatTensor],
173
172
  ) -> torch.FloatTensor:
174
173
  """
175
- Args:
176
174
  Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
177
175
  current timestep.
178
- sample (`torch.FloatTensor`): input sample timestep (`int`, optional): current timestep
176
+
177
+ Args:
178
+ sample (`torch.FloatTensor`):
179
+ The input sample.
180
+ timestep (`int`, *optional*):
181
+ The current timestep in the diffusion chain.
182
+
179
183
  Returns:
180
- `torch.FloatTensor`: scaled input sample
184
+ `torch.FloatTensor`:
185
+ A scaled input sample.
181
186
  """
182
187
  step_index = self.index_for_timestep(timestep)
183
188
 
@@ -196,13 +201,13 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
196
201
  num_train_timesteps: Optional[int] = None,
197
202
  ):
198
203
  """
199
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
204
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
200
205
 
201
206
  Args:
202
207
  num_inference_steps (`int`):
203
- the number of diffusion steps used when generating samples with a pre-trained model.
204
- device (`str` or `torch.device`, optional):
205
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
208
+ The number of diffusion steps used when generating samples with a pre-trained model.
209
+ device (`str` or `torch.device`, *optional*):
210
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
206
211
  """
207
212
  self.num_inference_steps = num_inference_steps
208
213
 
@@ -307,17 +312,25 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
307
312
  return_dict: bool = True,
308
313
  ) -> Union[SchedulerOutput, Tuple]:
309
314
  """
310
- Args:
311
- Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
315
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
312
316
  process from the learned model outputs (most often the predicted noise).
313
- model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model. timestep
314
- (`int`): current discrete timestep in the diffusion chain. sample (`torch.FloatTensor` or `np.ndarray`):
315
- current instance of sample being created by diffusion process.
316
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
317
+
318
+ Args:
319
+ model_output (`torch.FloatTensor`):
320
+ The direct output from learned diffusion model.
321
+ timestep (`float`):
322
+ The current discrete timestep in the diffusion chain.
323
+ sample (`torch.FloatTensor`):
324
+ A current instance of a sample created by the diffusion process.
325
+ generator (`torch.Generator`, *optional*):
326
+ A random number generator.
327
+ return_dict (`bool`):
328
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
329
+
317
330
  Returns:
318
331
  [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
319
- [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
320
- returning a tuple, the first element is the sample tensor.
332
+ If return_dict is `True`, [`~schedulers.scheduling_ddim.SchedulerOutput`] is returned, otherwise a
333
+ tuple is returned where the first element is the sample tensor.
321
334
  """
322
335
  step_index = self.index_for_timestep(timestep)
323
336