diffusers 0.30.3__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. diffusers/__init__.py +34 -2
  2. diffusers/configuration_utils.py +12 -0
  3. diffusers/dependency_versions_table.py +1 -1
  4. diffusers/image_processor.py +257 -54
  5. diffusers/loaders/__init__.py +2 -0
  6. diffusers/loaders/ip_adapter.py +5 -1
  7. diffusers/loaders/lora_base.py +14 -7
  8. diffusers/loaders/lora_conversion_utils.py +332 -0
  9. diffusers/loaders/lora_pipeline.py +707 -41
  10. diffusers/loaders/peft.py +1 -0
  11. diffusers/loaders/single_file_utils.py +81 -4
  12. diffusers/loaders/textual_inversion.py +2 -0
  13. diffusers/loaders/unet.py +39 -8
  14. diffusers/models/__init__.py +4 -0
  15. diffusers/models/adapter.py +53 -53
  16. diffusers/models/attention.py +86 -10
  17. diffusers/models/attention_processor.py +169 -133
  18. diffusers/models/autoencoders/autoencoder_kl.py +71 -11
  19. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +187 -88
  20. diffusers/models/controlnet_flux.py +536 -0
  21. diffusers/models/controlnet_sd3.py +7 -3
  22. diffusers/models/controlnet_sparsectrl.py +0 -1
  23. diffusers/models/embeddings.py +170 -61
  24. diffusers/models/embeddings_flax.py +23 -9
  25. diffusers/models/model_loading_utils.py +182 -14
  26. diffusers/models/modeling_utils.py +283 -46
  27. diffusers/models/normalization.py +79 -0
  28. diffusers/models/transformers/__init__.py +1 -0
  29. diffusers/models/transformers/auraflow_transformer_2d.py +1 -0
  30. diffusers/models/transformers/cogvideox_transformer_3d.py +23 -2
  31. diffusers/models/transformers/pixart_transformer_2d.py +9 -1
  32. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  33. diffusers/models/transformers/transformer_flux.py +161 -44
  34. diffusers/models/transformers/transformer_sd3.py +7 -1
  35. diffusers/models/unets/unet_2d_condition.py +8 -8
  36. diffusers/models/unets/unet_motion_model.py +41 -63
  37. diffusers/models/upsampling.py +6 -6
  38. diffusers/pipelines/__init__.py +35 -6
  39. diffusers/pipelines/animatediff/__init__.py +2 -0
  40. diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
  41. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +44 -20
  42. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
  43. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +2 -0
  44. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -66
  45. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  46. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +1 -1
  47. diffusers/pipelines/auto_pipeline.py +39 -8
  48. diffusers/pipelines/cogvideo/__init__.py +2 -0
  49. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +30 -17
  50. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +794 -0
  51. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +41 -31
  52. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +42 -29
  53. diffusers/pipelines/cogview3/__init__.py +47 -0
  54. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  55. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  56. diffusers/pipelines/controlnet/pipeline_controlnet.py +9 -1
  57. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +8 -0
  58. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +8 -0
  59. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +36 -13
  60. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +9 -1
  61. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +8 -1
  62. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +17 -3
  63. diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
  64. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +3 -1
  65. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  66. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  67. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  68. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
  69. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
  70. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
  71. diffusers/pipelines/flux/__init__.py +10 -0
  72. diffusers/pipelines/flux/pipeline_flux.py +53 -20
  73. diffusers/pipelines/flux/pipeline_flux_controlnet.py +984 -0
  74. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +988 -0
  75. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1182 -0
  76. diffusers/pipelines/flux/pipeline_flux_img2img.py +850 -0
  77. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1015 -0
  78. diffusers/pipelines/free_noise_utils.py +365 -5
  79. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +15 -3
  80. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
  81. diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
  82. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
  83. diffusers/pipelines/kolors/tokenizer.py +4 -0
  84. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
  85. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
  86. diffusers/pipelines/latte/pipeline_latte.py +2 -2
  87. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
  88. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
  89. diffusers/pipelines/lumina/pipeline_lumina.py +2 -2
  90. diffusers/pipelines/pag/__init__.py +6 -0
  91. diffusers/pipelines/pag/pag_utils.py +8 -2
  92. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1 -1
  93. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1544 -0
  94. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +2 -2
  95. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1685 -0
  96. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +17 -5
  97. diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
  98. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +1 -1
  99. diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
  100. diffusers/pipelines/pag/pipeline_pag_sd_3.py +12 -3
  101. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
  102. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1091 -0
  103. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
  104. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
  105. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
  106. diffusers/pipelines/pia/pipeline_pia.py +2 -0
  107. diffusers/pipelines/pipeline_loading_utils.py +225 -27
  108. diffusers/pipelines/pipeline_utils.py +123 -180
  109. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +1 -1
  110. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +1 -1
  111. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
  112. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
  113. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +28 -6
  114. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
  115. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
  116. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
  117. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +12 -3
  118. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +20 -4
  119. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +3 -3
  120. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  121. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
  122. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
  123. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -4
  124. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -14
  125. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -14
  126. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
  127. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
  128. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
  129. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
  130. diffusers/quantizers/__init__.py +16 -0
  131. diffusers/quantizers/auto.py +126 -0
  132. diffusers/quantizers/base.py +233 -0
  133. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  134. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +558 -0
  135. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  136. diffusers/quantizers/quantization_config.py +391 -0
  137. diffusers/schedulers/scheduling_ddim.py +4 -1
  138. diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
  139. diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
  140. diffusers/schedulers/scheduling_ddpm.py +4 -1
  141. diffusers/schedulers/scheduling_ddpm_parallel.py +4 -1
  142. diffusers/schedulers/scheduling_deis_multistep.py +78 -1
  143. diffusers/schedulers/scheduling_dpmsolver_multistep.py +82 -1
  144. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +80 -1
  145. diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
  146. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +82 -1
  147. diffusers/schedulers/scheduling_edm_euler.py +8 -6
  148. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
  149. diffusers/schedulers/scheduling_euler_discrete.py +92 -7
  150. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
  151. diffusers/schedulers/scheduling_heun_discrete.py +114 -8
  152. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
  153. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
  154. diffusers/schedulers/scheduling_lms_discrete.py +76 -1
  155. diffusers/schedulers/scheduling_sasolver.py +78 -1
  156. diffusers/schedulers/scheduling_unclip.py +4 -1
  157. diffusers/schedulers/scheduling_unipc_multistep.py +78 -1
  158. diffusers/training_utils.py +48 -18
  159. diffusers/utils/__init__.py +2 -1
  160. diffusers/utils/dummy_pt_objects.py +60 -0
  161. diffusers/utils/dummy_torch_and_transformers_objects.py +165 -0
  162. diffusers/utils/hub_utils.py +16 -4
  163. diffusers/utils/import_utils.py +31 -8
  164. diffusers/utils/loading_utils.py +28 -4
  165. diffusers/utils/peft_utils.py +3 -3
  166. diffusers/utils/testing_utils.py +59 -0
  167. {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/METADATA +7 -6
  168. {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/RECORD +172 -149
  169. {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/LICENSE +0 -0
  170. {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/WHEEL +0 -0
  171. {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/entry_points.txt +0 -0
  172. {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/top_level.txt +0 -0
@@ -13,13 +13,38 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import math
16
+ from dataclasses import dataclass
16
17
  from typing import List, Optional, Tuple, Union
17
18
 
18
19
  import numpy as np
19
20
  import torch
20
21
 
21
22
  from ..configuration_utils import ConfigMixin, register_to_config
22
- from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput
23
+ from ..utils import BaseOutput, is_scipy_available
24
+ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
25
+
26
+
27
+ if is_scipy_available():
28
+ import scipy.stats
29
+
30
+
31
+ @dataclass
32
+ # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->KDPM2Discrete
33
+ class KDPM2DiscreteSchedulerOutput(BaseOutput):
34
+ """
35
+ Output class for the scheduler's `step` function output.
36
+
37
+ Args:
38
+ prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
39
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
40
+ denoising loop.
41
+ pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
42
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
43
+ `pred_original_sample` can be used to preview progress or for guidance.
44
+ """
45
+
46
+ prev_sample: torch.Tensor
47
+ pred_original_sample: Optional[torch.Tensor] = None
23
48
 
24
49
 
25
50
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
@@ -90,6 +115,11 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
90
115
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
91
116
  Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
92
117
  the sigmas are determined according to a sequence of noise levels {σi}.
118
+ use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
119
+ Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
120
+ use_beta_sigmas (`bool`, *optional*, defaults to `False`):
121
+ Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta
122
+ Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
93
123
  prediction_type (`str`, defaults to `epsilon`, *optional*):
94
124
  Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
95
125
  `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
@@ -113,10 +143,18 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
113
143
  beta_schedule: str = "linear",
114
144
  trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
115
145
  use_karras_sigmas: Optional[bool] = False,
146
+ use_exponential_sigmas: Optional[bool] = False,
147
+ use_beta_sigmas: Optional[bool] = False,
116
148
  prediction_type: str = "epsilon",
117
149
  timestep_spacing: str = "linspace",
118
150
  steps_offset: int = 0,
119
151
  ):
152
+ if self.config.use_beta_sigmas and not is_scipy_available():
153
+ raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
154
+ if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
155
+ raise ValueError(
156
+ "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
157
+ )
120
158
  if trained_betas is not None:
121
159
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
122
160
  elif beta_schedule == "linear":
@@ -249,6 +287,12 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
249
287
  if self.config.use_karras_sigmas:
250
288
  sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
251
289
  timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
290
+ elif self.config.use_exponential_sigmas:
291
+ sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
292
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
293
+ elif self.config.use_beta_sigmas:
294
+ sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
295
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
252
296
 
253
297
  self.log_sigmas = torch.from_numpy(log_sigmas).to(device=device)
254
298
  sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
@@ -359,13 +403,67 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
359
403
  sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
360
404
  return sigmas
361
405
 
406
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
407
+ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
408
+ """Constructs an exponential noise schedule."""
409
+
410
+ # Hack to make sure that other schedulers which copy this function don't break
411
+ # TODO: Add this logic to the other schedulers
412
+ if hasattr(self.config, "sigma_min"):
413
+ sigma_min = self.config.sigma_min
414
+ else:
415
+ sigma_min = None
416
+
417
+ if hasattr(self.config, "sigma_max"):
418
+ sigma_max = self.config.sigma_max
419
+ else:
420
+ sigma_max = None
421
+
422
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
423
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
424
+
425
+ sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
426
+ return sigmas
427
+
428
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta
429
+ def _convert_to_beta(
430
+ self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
431
+ ) -> torch.Tensor:
432
+ """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
433
+
434
+ # Hack to make sure that other schedulers which copy this function don't break
435
+ # TODO: Add this logic to the other schedulers
436
+ if hasattr(self.config, "sigma_min"):
437
+ sigma_min = self.config.sigma_min
438
+ else:
439
+ sigma_min = None
440
+
441
+ if hasattr(self.config, "sigma_max"):
442
+ sigma_max = self.config.sigma_max
443
+ else:
444
+ sigma_max = None
445
+
446
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
447
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
448
+
449
+ sigmas = torch.Tensor(
450
+ [
451
+ sigma_min + (ppf * (sigma_max - sigma_min))
452
+ for ppf in [
453
+ scipy.stats.beta.ppf(timestep, alpha, beta)
454
+ for timestep in 1 - np.linspace(0, 1, num_inference_steps)
455
+ ]
456
+ ]
457
+ )
458
+ return sigmas
459
+
362
460
  def step(
363
461
  self,
364
462
  model_output: Union[torch.Tensor, np.ndarray],
365
463
  timestep: Union[float, torch.Tensor],
366
464
  sample: Union[torch.Tensor, np.ndarray],
367
465
  return_dict: bool = True,
368
- ) -> Union[SchedulerOutput, Tuple]:
466
+ ) -> Union[KDPM2DiscreteSchedulerOutput, Tuple]:
369
467
  """
370
468
  Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
371
469
  process from the learned model outputs (most often the predicted noise).
@@ -378,12 +476,13 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
378
476
  sample (`torch.Tensor`):
379
477
  A current instance of a sample created by the diffusion process.
380
478
  return_dict (`bool`):
381
- Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
479
+ Whether or not to return a [`~schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteSchedulerOutput`] or
480
+ tuple.
382
481
 
383
482
  Returns:
384
- [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
385
- If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
386
- tuple is returned where the first element is the sample tensor.
483
+ [`~schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteSchedulerOutput`] or `tuple`:
484
+ If return_dict is `True`, [`~schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteSchedulerOutput`] is
485
+ returned, otherwise a tuple is returned where the first element is the sample tensor.
387
486
  """
388
487
  if self.step_index is None:
389
488
  self._init_step_index(timestep)
@@ -445,9 +544,12 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
445
544
  prev_sample = sample + derivative * dt
446
545
 
447
546
  if not return_dict:
448
- return (prev_sample,)
547
+ return (
548
+ prev_sample,
549
+ pred_original_sample,
550
+ )
449
551
 
450
- return SchedulerOutput(prev_sample=prev_sample)
552
+ return KDPM2DiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
451
553
 
452
554
  # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise
453
555
  def add_noise(
@@ -17,6 +17,7 @@ from dataclasses import dataclass
17
17
  from typing import List, Optional, Tuple, Union
18
18
 
19
19
  import numpy as np
20
+ import scipy.stats
20
21
  import torch
21
22
  from scipy import integrate
22
23
 
@@ -111,6 +112,11 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
111
112
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
112
113
  Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
113
114
  the sigmas are determined according to a sequence of noise levels {σi}.
115
+ use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
116
+ Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
117
+ use_beta_sigmas (`bool`, *optional*, defaults to `False`):
118
+ Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta
119
+ Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
114
120
  prediction_type (`str`, defaults to `epsilon`, *optional*):
115
121
  Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
116
122
  `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
@@ -134,10 +140,16 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
134
140
  beta_schedule: str = "linear",
135
141
  trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
136
142
  use_karras_sigmas: Optional[bool] = False,
143
+ use_exponential_sigmas: Optional[bool] = False,
144
+ use_beta_sigmas: Optional[bool] = False,
137
145
  prediction_type: str = "epsilon",
138
146
  timestep_spacing: str = "linspace",
139
147
  steps_offset: int = 0,
140
148
  ):
149
+ if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
150
+ raise ValueError(
151
+ "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
152
+ )
141
153
  if trained_betas is not None:
142
154
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
143
155
  elif beta_schedule == "linear":
@@ -289,6 +301,12 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
289
301
  if self.config.use_karras_sigmas:
290
302
  sigmas = self._convert_to_karras(in_sigmas=sigmas)
291
303
  timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
304
+ elif self.config.use_exponential_sigmas:
305
+ sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
306
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
307
+ elif self.config.use_beta_sigmas:
308
+ sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
309
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
292
310
 
293
311
  sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
294
312
 
@@ -362,6 +380,60 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
362
380
  sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
363
381
  return sigmas
364
382
 
383
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
384
+ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
385
+ """Constructs an exponential noise schedule."""
386
+
387
+ # Hack to make sure that other schedulers which copy this function don't break
388
+ # TODO: Add this logic to the other schedulers
389
+ if hasattr(self.config, "sigma_min"):
390
+ sigma_min = self.config.sigma_min
391
+ else:
392
+ sigma_min = None
393
+
394
+ if hasattr(self.config, "sigma_max"):
395
+ sigma_max = self.config.sigma_max
396
+ else:
397
+ sigma_max = None
398
+
399
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
400
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
401
+
402
+ sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
403
+ return sigmas
404
+
405
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta
406
+ def _convert_to_beta(
407
+ self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
408
+ ) -> torch.Tensor:
409
+ """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
410
+
411
+ # Hack to make sure that other schedulers which copy this function don't break
412
+ # TODO: Add this logic to the other schedulers
413
+ if hasattr(self.config, "sigma_min"):
414
+ sigma_min = self.config.sigma_min
415
+ else:
416
+ sigma_min = None
417
+
418
+ if hasattr(self.config, "sigma_max"):
419
+ sigma_max = self.config.sigma_max
420
+ else:
421
+ sigma_max = None
422
+
423
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
424
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
425
+
426
+ sigmas = torch.Tensor(
427
+ [
428
+ sigma_min + (ppf * (sigma_max - sigma_min))
429
+ for ppf in [
430
+ scipy.stats.beta.ppf(timestep, alpha, beta)
431
+ for timestep in 1 - np.linspace(0, 1, num_inference_steps)
432
+ ]
433
+ ]
434
+ )
435
+ return sigmas
436
+
365
437
  def step(
366
438
  self,
367
439
  model_output: torch.Tensor,
@@ -435,7 +507,10 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
435
507
  self._step_index += 1
436
508
 
437
509
  if not return_dict:
438
- return (prev_sample,)
510
+ return (
511
+ prev_sample,
512
+ pred_original_sample,
513
+ )
439
514
 
440
515
  return LMSDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
441
516
 
@@ -22,11 +22,15 @@ import numpy as np
22
22
  import torch
23
23
 
24
24
  from ..configuration_utils import ConfigMixin, register_to_config
25
- from ..utils import deprecate
25
+ from ..utils import deprecate, is_scipy_available
26
26
  from ..utils.torch_utils import randn_tensor
27
27
  from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput
28
28
 
29
29
 
30
+ if is_scipy_available():
31
+ import scipy.stats
32
+
33
+
30
34
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
31
35
  def betas_for_alpha_bar(
32
36
  num_diffusion_timesteps,
@@ -122,6 +126,11 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
122
126
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
123
127
  Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
124
128
  the sigmas are determined according to a sequence of noise levels {σi}.
129
+ use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
130
+ Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
131
+ use_beta_sigmas (`bool`, *optional*, defaults to `False`):
132
+ Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta
133
+ Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
125
134
  lambda_min_clipped (`float`, defaults to `-inf`):
126
135
  Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the
127
136
  cosine (`squaredcos_cap_v2`) noise schedule.
@@ -156,11 +165,19 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
156
165
  algorithm_type: str = "data_prediction",
157
166
  lower_order_final: bool = True,
158
167
  use_karras_sigmas: Optional[bool] = False,
168
+ use_exponential_sigmas: Optional[bool] = False,
169
+ use_beta_sigmas: Optional[bool] = False,
159
170
  lambda_min_clipped: float = -float("inf"),
160
171
  variance_type: Optional[str] = None,
161
172
  timestep_spacing: str = "linspace",
162
173
  steps_offset: int = 0,
163
174
  ):
175
+ if self.config.use_beta_sigmas and not is_scipy_available():
176
+ raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
177
+ if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
178
+ raise ValueError(
179
+ "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
180
+ )
164
181
  if trained_betas is not None:
165
182
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
166
183
  elif beta_schedule == "linear":
@@ -284,6 +301,12 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
284
301
  sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
285
302
  timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
286
303
  sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
304
+ elif self.config.use_exponential_sigmas:
305
+ sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
306
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
307
+ elif self.config.use_beta_sigmas:
308
+ sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
309
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
287
310
  else:
288
311
  sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
289
312
  sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
@@ -395,6 +418,60 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
395
418
  sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
396
419
  return sigmas
397
420
 
421
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
422
+ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
423
+ """Constructs an exponential noise schedule."""
424
+
425
+ # Hack to make sure that other schedulers which copy this function don't break
426
+ # TODO: Add this logic to the other schedulers
427
+ if hasattr(self.config, "sigma_min"):
428
+ sigma_min = self.config.sigma_min
429
+ else:
430
+ sigma_min = None
431
+
432
+ if hasattr(self.config, "sigma_max"):
433
+ sigma_max = self.config.sigma_max
434
+ else:
435
+ sigma_max = None
436
+
437
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
438
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
439
+
440
+ sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
441
+ return sigmas
442
+
443
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta
444
+ def _convert_to_beta(
445
+ self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
446
+ ) -> torch.Tensor:
447
+ """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
448
+
449
+ # Hack to make sure that other schedulers which copy this function don't break
450
+ # TODO: Add this logic to the other schedulers
451
+ if hasattr(self.config, "sigma_min"):
452
+ sigma_min = self.config.sigma_min
453
+ else:
454
+ sigma_min = None
455
+
456
+ if hasattr(self.config, "sigma_max"):
457
+ sigma_max = self.config.sigma_max
458
+ else:
459
+ sigma_max = None
460
+
461
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
462
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
463
+
464
+ sigmas = torch.Tensor(
465
+ [
466
+ sigma_min + (ppf * (sigma_max - sigma_min))
467
+ for ppf in [
468
+ scipy.stats.beta.ppf(timestep, alpha, beta)
469
+ for timestep in 1 - np.linspace(0, 1, num_inference_steps)
470
+ ]
471
+ ]
472
+ )
473
+ return sigmas
474
+
398
475
  def convert_model_output(
399
476
  self,
400
477
  model_output: torch.Tensor,
@@ -320,7 +320,10 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
320
320
  pred_prev_sample = pred_prev_sample + variance
321
321
 
322
322
  if not return_dict:
323
- return (pred_prev_sample,)
323
+ return (
324
+ pred_prev_sample,
325
+ pred_original_sample,
326
+ )
324
327
 
325
328
  return UnCLIPSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample)
326
329
 
@@ -22,10 +22,14 @@ import numpy as np
22
22
  import torch
23
23
 
24
24
  from ..configuration_utils import ConfigMixin, register_to_config
25
- from ..utils import deprecate
25
+ from ..utils import deprecate, is_scipy_available
26
26
  from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput
27
27
 
28
28
 
29
+ if is_scipy_available():
30
+ import scipy.stats
31
+
32
+
29
33
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
30
34
  def betas_for_alpha_bar(
31
35
  num_diffusion_timesteps,
@@ -159,6 +163,11 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
159
163
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
160
164
  Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
161
165
  the sigmas are determined according to a sequence of noise levels {σi}.
166
+ use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
167
+ Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
168
+ use_beta_sigmas (`bool`, *optional*, defaults to `False`):
169
+ Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta
170
+ Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
162
171
  timestep_spacing (`str`, defaults to `"linspace"`):
163
172
  The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
164
173
  Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
@@ -195,11 +204,19 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
195
204
  disable_corrector: List[int] = [],
196
205
  solver_p: SchedulerMixin = None,
197
206
  use_karras_sigmas: Optional[bool] = False,
207
+ use_exponential_sigmas: Optional[bool] = False,
208
+ use_beta_sigmas: Optional[bool] = False,
198
209
  timestep_spacing: str = "linspace",
199
210
  steps_offset: int = 0,
200
211
  final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min"
201
212
  rescale_betas_zero_snr: bool = False,
202
213
  ):
214
+ if self.config.use_beta_sigmas and not is_scipy_available():
215
+ raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
216
+ if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
217
+ raise ValueError(
218
+ "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
219
+ )
203
220
  if trained_betas is not None:
204
221
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
205
222
  elif beta_schedule == "linear":
@@ -329,6 +346,12 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
329
346
  f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}"
330
347
  )
331
348
  sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
349
+ elif self.config.use_exponential_sigmas:
350
+ sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
351
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
352
+ elif self.config.use_beta_sigmas:
353
+ sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
354
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
332
355
  else:
333
356
  sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
334
357
  if self.config.final_sigmas_type == "sigma_min":
@@ -450,6 +473,60 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
450
473
  sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
451
474
  return sigmas
452
475
 
476
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
477
+ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
478
+ """Constructs an exponential noise schedule."""
479
+
480
+ # Hack to make sure that other schedulers which copy this function don't break
481
+ # TODO: Add this logic to the other schedulers
482
+ if hasattr(self.config, "sigma_min"):
483
+ sigma_min = self.config.sigma_min
484
+ else:
485
+ sigma_min = None
486
+
487
+ if hasattr(self.config, "sigma_max"):
488
+ sigma_max = self.config.sigma_max
489
+ else:
490
+ sigma_max = None
491
+
492
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
493
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
494
+
495
+ sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
496
+ return sigmas
497
+
498
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta
499
+ def _convert_to_beta(
500
+ self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
501
+ ) -> torch.Tensor:
502
+ """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
503
+
504
+ # Hack to make sure that other schedulers which copy this function don't break
505
+ # TODO: Add this logic to the other schedulers
506
+ if hasattr(self.config, "sigma_min"):
507
+ sigma_min = self.config.sigma_min
508
+ else:
509
+ sigma_min = None
510
+
511
+ if hasattr(self.config, "sigma_max"):
512
+ sigma_max = self.config.sigma_max
513
+ else:
514
+ sigma_max = None
515
+
516
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
517
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
518
+
519
+ sigmas = torch.Tensor(
520
+ [
521
+ sigma_min + (ppf * (sigma_max - sigma_min))
522
+ for ppf in [
523
+ scipy.stats.beta.ppf(timestep, alpha, beta)
524
+ for timestep in 1 - np.linspace(0, 1, num_inference_steps)
525
+ ]
526
+ ]
527
+ )
528
+ return sigmas
529
+
453
530
  def convert_model_output(
454
531
  self,
455
532
  model_output: torch.Tensor,