diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. diffusers/__init__.py +26 -1
  2. diffusers/configuration_utils.py +34 -29
  3. diffusers/dependency_versions_table.py +4 -0
  4. diffusers/image_processor.py +125 -12
  5. diffusers/loaders.py +169 -203
  6. diffusers/models/attention.py +24 -1
  7. diffusers/models/attention_flax.py +10 -5
  8. diffusers/models/attention_processor.py +3 -0
  9. diffusers/models/autoencoder_kl.py +114 -33
  10. diffusers/models/controlnet.py +131 -14
  11. diffusers/models/controlnet_flax.py +37 -26
  12. diffusers/models/cross_attention.py +17 -17
  13. diffusers/models/embeddings.py +67 -0
  14. diffusers/models/modeling_flax_utils.py +64 -56
  15. diffusers/models/modeling_utils.py +193 -104
  16. diffusers/models/prior_transformer.py +207 -37
  17. diffusers/models/resnet.py +26 -26
  18. diffusers/models/transformer_2d.py +36 -41
  19. diffusers/models/transformer_temporal.py +24 -21
  20. diffusers/models/unet_1d.py +31 -25
  21. diffusers/models/unet_2d.py +43 -30
  22. diffusers/models/unet_2d_blocks.py +210 -89
  23. diffusers/models/unet_2d_blocks_flax.py +12 -12
  24. diffusers/models/unet_2d_condition.py +172 -64
  25. diffusers/models/unet_2d_condition_flax.py +38 -24
  26. diffusers/models/unet_3d_blocks.py +34 -31
  27. diffusers/models/unet_3d_condition.py +101 -34
  28. diffusers/models/vae.py +5 -5
  29. diffusers/models/vae_flax.py +37 -34
  30. diffusers/models/vq_model.py +23 -14
  31. diffusers/pipelines/__init__.py +24 -1
  32. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
  33. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
  34. diffusers/pipelines/consistency_models/__init__.py +1 -0
  35. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
  36. diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
  37. diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
  38. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
  39. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
  40. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  41. diffusers/pipelines/kandinsky/__init__.py +1 -1
  42. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
  43. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
  44. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
  45. diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
  46. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
  47. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
  48. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
  49. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
  50. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
  51. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
  52. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
  53. diffusers/pipelines/pipeline_flax_utils.py +2 -2
  54. diffusers/pipelines/pipeline_utils.py +124 -146
  55. diffusers/pipelines/shap_e/__init__.py +27 -0
  56. diffusers/pipelines/shap_e/camera.py +147 -0
  57. diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
  58. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
  59. diffusers/pipelines/shap_e/renderer.py +709 -0
  60. diffusers/pipelines/stable_diffusion/__init__.py +2 -0
  61. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
  65. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
  66. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
  67. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  68. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
  69. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
  70. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
  71. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
  72. diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
  73. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
  74. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
  75. diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
  76. diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
  77. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
  78. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
  79. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
  80. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
  81. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
  82. diffusers/schedulers/__init__.py +3 -0
  83. diffusers/schedulers/scheduling_consistency_models.py +380 -0
  84. diffusers/schedulers/scheduling_ddim.py +28 -6
  85. diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
  86. diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
  87. diffusers/schedulers/scheduling_ddpm.py +53 -7
  88. diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
  89. diffusers/schedulers/scheduling_deis_multistep.py +66 -11
  90. diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
  91. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
  92. diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
  93. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
  94. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
  95. diffusers/schedulers/scheduling_euler_discrete.py +58 -8
  96. diffusers/schedulers/scheduling_heun_discrete.py +89 -14
  97. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
  98. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
  99. diffusers/schedulers/scheduling_lms_discrete.py +57 -8
  100. diffusers/schedulers/scheduling_pndm.py +46 -10
  101. diffusers/schedulers/scheduling_repaint.py +19 -4
  102. diffusers/schedulers/scheduling_sde_ve.py +5 -1
  103. diffusers/schedulers/scheduling_unclip.py +43 -4
  104. diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
  105. diffusers/training_utils.py +1 -1
  106. diffusers/utils/__init__.py +2 -1
  107. diffusers/utils/dummy_pt_objects.py +60 -0
  108. diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
  109. diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
  110. diffusers/utils/hub_utils.py +1 -1
  111. diffusers/utils/import_utils.py +20 -3
  112. diffusers/utils/logging.py +15 -18
  113. diffusers/utils/outputs.py +3 -3
  114. diffusers/utils/testing_utils.py +15 -0
  115. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
  116. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
  117. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
  118. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
  119. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
  120. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
@@ -26,7 +26,11 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
26
26
 
27
27
 
28
28
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
29
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
29
+ def betas_for_alpha_bar(
30
+ num_diffusion_timesteps,
31
+ max_beta=0.999,
32
+ alpha_transform_type="cosine",
33
+ ):
30
34
  """
31
35
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
32
36
  (1-beta) over time from t = [0,1].
@@ -39,19 +43,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
39
43
  num_diffusion_timesteps (`int`): the number of betas to produce.
40
44
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
41
45
  prevent singularities.
46
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
47
+ Choose from `cosine` or `exp`
42
48
 
43
49
  Returns:
44
50
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
45
51
  """
52
+ if alpha_transform_type == "cosine":
46
53
 
47
- def alpha_bar(time_step):
48
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
54
+ def alpha_bar_fn(t):
55
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
56
+
57
+ elif alpha_transform_type == "exp":
58
+
59
+ def alpha_bar_fn(t):
60
+ return math.exp(t * -12.0)
61
+
62
+ else:
63
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
49
64
 
50
65
  betas = []
51
66
  for i in range(num_diffusion_timesteps):
52
67
  t1 = i / num_diffusion_timesteps
53
68
  t2 = (i + 1) / num_diffusion_timesteps
54
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
69
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
55
70
  return torch.tensor(betas, dtype=torch.float32)
56
71
 
57
72
 
@@ -103,7 +118,17 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
103
118
  lower_order_final (`bool`, default `True`):
104
119
  whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. We empirically
105
120
  find this trick can stabilize the sampling of DEIS for steps < 15, especially for steps <= 10.
106
-
121
+ use_karras_sigmas (`bool`, *optional*, defaults to `False`):
122
+ This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
123
+ noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
124
+ of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
125
+ timestep_spacing (`str`, default `"linspace"`):
126
+ The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
127
+ Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
128
+ steps_offset (`int`, default `0`):
129
+ an offset added to the inference steps. You can use a combination of `offset=1` and
130
+ `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
131
+ stable diffusion.
107
132
  """
108
133
 
109
134
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -125,6 +150,9 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
125
150
  algorithm_type: str = "deis",
126
151
  solver_type: str = "logrho",
127
152
  lower_order_final: bool = True,
153
+ use_karras_sigmas: Optional[bool] = False,
154
+ timestep_spacing: str = "linspace",
155
+ steps_offset: int = 0,
128
156
  ):
129
157
  if trained_betas is not None:
130
158
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -181,12 +209,39 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
181
209
  device (`str` or `torch.device`, optional):
182
210
  the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
183
211
  """
184
- timesteps = (
185
- np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
186
- .round()[::-1][:-1]
187
- .copy()
188
- .astype(np.int64)
189
- )
212
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
213
+ if self.config.timestep_spacing == "linspace":
214
+ timesteps = (
215
+ np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
216
+ .round()[::-1][:-1]
217
+ .copy()
218
+ .astype(np.int64)
219
+ )
220
+ elif self.config.timestep_spacing == "leading":
221
+ step_ratio = self.config.num_train_timesteps // (num_inference_steps + 1)
222
+ # creates integer timesteps by multiplying by ratio
223
+ # casting to int to avoid issues when num_inference_step is power of 3
224
+ timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64)
225
+ timesteps += self.config.steps_offset
226
+ elif self.config.timestep_spacing == "trailing":
227
+ step_ratio = self.config.num_train_timesteps / num_inference_steps
228
+ # creates integer timesteps by multiplying by ratio
229
+ # casting to int to avoid issues when num_inference_step is power of 3
230
+ timesteps = np.arange(self.config.num_train_timesteps, 0, -step_ratio).round().copy().astype(np.int64)
231
+ timesteps -= 1
232
+ else:
233
+ raise ValueError(
234
+ f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
235
+ )
236
+
237
+ sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
238
+ if self.config.use_karras_sigmas:
239
+ log_sigmas = np.log(sigmas)
240
+ sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
241
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
242
+ timesteps = np.flip(timesteps).copy().astype(np.int64)
243
+
244
+ self.sigmas = torch.from_numpy(sigmas)
190
245
 
191
246
  # when num_inference_steps == num_train_timesteps, we can end up with
192
247
  # duplicates in timesteps.
@@ -26,7 +26,11 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
26
26
 
27
27
 
28
28
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
29
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
29
+ def betas_for_alpha_bar(
30
+ num_diffusion_timesteps,
31
+ max_beta=0.999,
32
+ alpha_transform_type="cosine",
33
+ ):
30
34
  """
31
35
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
32
36
  (1-beta) over time from t = [0,1].
@@ -39,19 +43,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
39
43
  num_diffusion_timesteps (`int`): the number of betas to produce.
40
44
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
41
45
  prevent singularities.
46
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
47
+ Choose from `cosine` or `exp`
42
48
 
43
49
  Returns:
44
50
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
45
51
  """
52
+ if alpha_transform_type == "cosine":
46
53
 
47
- def alpha_bar(time_step):
48
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
54
+ def alpha_bar_fn(t):
55
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
56
+
57
+ elif alpha_transform_type == "exp":
58
+
59
+ def alpha_bar_fn(t):
60
+ return math.exp(t * -12.0)
61
+
62
+ else:
63
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
49
64
 
50
65
  betas = []
51
66
  for i in range(num_diffusion_timesteps):
52
67
  t1 = i / num_diffusion_timesteps
53
68
  t2 = (i + 1) / num_diffusion_timesteps
54
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
69
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
55
70
  return torch.tensor(betas, dtype=torch.float32)
56
71
 
57
72
 
@@ -134,6 +149,13 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
134
149
  guided-diffusion (https://github.com/openai/guided-diffusion) predicts both mean and variance of the
135
150
  Gaussian distribution in the model's output. DPM-Solver only needs the "mean" output because it is based on
136
151
  diffusion ODEs.
152
+ timestep_spacing (`str`, default `"linspace"`):
153
+ The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
154
+ Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
155
+ steps_offset (`int`, default `0`):
156
+ an offset added to the inference steps. You can use a combination of `offset=1` and
157
+ `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
158
+ stable diffusion.
137
159
  """
138
160
 
139
161
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -158,6 +180,8 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
158
180
  use_karras_sigmas: Optional[bool] = False,
159
181
  lambda_min_clipped: float = -float("inf"),
160
182
  variance_type: Optional[str] = None,
183
+ timestep_spacing: str = "linspace",
184
+ steps_offset: int = 0,
161
185
  ):
162
186
  if trained_betas is not None:
163
187
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -203,7 +227,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
203
227
  self.timesteps = torch.from_numpy(timesteps)
204
228
  self.model_outputs = [None] * solver_order
205
229
  self.lower_order_nums = 0
206
- self.use_karras_sigmas = use_karras_sigmas
207
230
 
208
231
  def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torch.device] = None):
209
232
  """
@@ -218,20 +241,39 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
218
241
  # Clipping the minimum of all lambda(t) for numerical stability.
219
242
  # This is critical for cosine (squaredcos_cap_v2) noise schedule.
220
243
  clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.config.lambda_min_clipped)
221
- timesteps = (
222
- np.linspace(0, self.config.num_train_timesteps - 1 - clipped_idx, num_inference_steps + 1)
223
- .round()[::-1][:-1]
224
- .copy()
225
- .astype(np.int64)
226
- )
244
+ last_timestep = ((self.config.num_train_timesteps - clipped_idx).numpy()).item()
245
+
246
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
247
+ if self.config.timestep_spacing == "linspace":
248
+ timesteps = (
249
+ np.linspace(0, last_timestep - 1, num_inference_steps + 1).round()[::-1][:-1].copy().astype(np.int64)
250
+ )
251
+ elif self.config.timestep_spacing == "leading":
252
+ step_ratio = last_timestep // (num_inference_steps + 1)
253
+ # creates integer timesteps by multiplying by ratio
254
+ # casting to int to avoid issues when num_inference_step is power of 3
255
+ timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64)
256
+ timesteps += self.config.steps_offset
257
+ elif self.config.timestep_spacing == "trailing":
258
+ step_ratio = self.config.num_train_timesteps / num_inference_steps
259
+ # creates integer timesteps by multiplying by ratio
260
+ # casting to int to avoid issues when num_inference_step is power of 3
261
+ timesteps = np.arange(last_timestep, 0, -step_ratio).round().copy().astype(np.int64)
262
+ timesteps -= 1
263
+ else:
264
+ raise ValueError(
265
+ f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
266
+ )
227
267
 
228
- if self.use_karras_sigmas:
229
- sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
268
+ sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
269
+ if self.config.use_karras_sigmas:
230
270
  log_sigmas = np.log(sigmas)
231
271
  sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
232
272
  timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
233
273
  timesteps = np.flip(timesteps).copy().astype(np.int64)
234
274
 
275
+ self.sigmas = torch.from_numpy(sigmas)
276
+
235
277
  # when num_inference_steps == num_train_timesteps, we can end up with
236
278
  # duplicates in timesteps.
237
279
  _, unique_indices = np.unique(timesteps, return_index=True)
@@ -26,7 +26,11 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
26
26
 
27
27
 
28
28
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
29
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
29
+ def betas_for_alpha_bar(
30
+ num_diffusion_timesteps,
31
+ max_beta=0.999,
32
+ alpha_transform_type="cosine",
33
+ ):
30
34
  """
31
35
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
32
36
  (1-beta) over time from t = [0,1].
@@ -39,19 +43,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
39
43
  num_diffusion_timesteps (`int`): the number of betas to produce.
40
44
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
41
45
  prevent singularities.
46
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
47
+ Choose from `cosine` or `exp`
42
48
 
43
49
  Returns:
44
50
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
45
51
  """
52
+ if alpha_transform_type == "cosine":
46
53
 
47
- def alpha_bar(time_step):
48
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
54
+ def alpha_bar_fn(t):
55
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
56
+
57
+ elif alpha_transform_type == "exp":
58
+
59
+ def alpha_bar_fn(t):
60
+ return math.exp(t * -12.0)
61
+
62
+ else:
63
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
49
64
 
50
65
  betas = []
51
66
  for i in range(num_diffusion_timesteps):
52
67
  t1 = i / num_diffusion_timesteps
53
68
  t2 = (i + 1) / num_diffusion_timesteps
54
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
69
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
55
70
  return torch.tensor(betas, dtype=torch.float32)
56
71
 
57
72
 
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import math
16
+ from collections import defaultdict
16
17
  from typing import List, Optional, Tuple, Union
17
18
 
18
19
  import numpy as np
@@ -76,7 +77,11 @@ class BrownianTreeNoiseSampler:
76
77
 
77
78
 
78
79
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
79
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor:
80
+ def betas_for_alpha_bar(
81
+ num_diffusion_timesteps,
82
+ max_beta=0.999,
83
+ alpha_transform_type="cosine",
84
+ ):
80
85
  """
81
86
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
82
87
  (1-beta) over time from t = [0,1].
@@ -89,19 +94,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor
89
94
  num_diffusion_timesteps (`int`): the number of betas to produce.
90
95
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
91
96
  prevent singularities.
97
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
98
+ Choose from `cosine` or `exp`
92
99
 
93
100
  Returns:
94
101
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
95
102
  """
103
+ if alpha_transform_type == "cosine":
96
104
 
97
- def alpha_bar(time_step):
98
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
105
+ def alpha_bar_fn(t):
106
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
107
+
108
+ elif alpha_transform_type == "exp":
109
+
110
+ def alpha_bar_fn(t):
111
+ return math.exp(t * -12.0)
112
+
113
+ else:
114
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
99
115
 
100
116
  betas = []
101
117
  for i in range(num_diffusion_timesteps):
102
118
  t1 = i / num_diffusion_timesteps
103
119
  t2 = (i + 1) / num_diffusion_timesteps
104
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
120
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
105
121
  return torch.tensor(betas, dtype=torch.float32)
106
122
 
107
123
 
@@ -133,6 +149,13 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
133
149
  of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
134
150
  noise_sampler_seed (`int`, *optional*, defaults to `None`):
135
151
  The random seed to use for the noise sampler. If `None`, a random seed will be generated.
152
+ timestep_spacing (`str`, default `"linspace"`):
153
+ The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
154
+ Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
155
+ steps_offset (`int`, default `0`):
156
+ an offset added to the inference steps. You can use a combination of `offset=1` and
157
+ `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
158
+ stable diffusion.
136
159
  """
137
160
 
138
161
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -149,6 +172,8 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
149
172
  prediction_type: str = "epsilon",
150
173
  use_karras_sigmas: Optional[bool] = False,
151
174
  noise_sampler_seed: Optional[int] = None,
175
+ timestep_spacing: str = "linspace",
176
+ steps_offset: int = 0,
152
177
  ):
153
178
  if trained_betas is not None:
154
179
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -181,12 +206,26 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
181
206
 
182
207
  indices = (schedule_timesteps == timestep).nonzero()
183
208
 
184
- if self.state_in_first_order:
185
- pos = -1
209
+ # The sigma index that is taken for the **very** first `step`
210
+ # is always the second index (or the last index if there is only 1)
211
+ # This way we can ensure we don't accidentally skip a sigma in
212
+ # case we start in the middle of the denoising schedule (e.g. for image-to-image)
213
+ if len(self._index_counter) == 0:
214
+ pos = 1 if len(indices) > 1 else 0
186
215
  else:
187
- pos = 0
216
+ timestep_int = timestep.cpu().item() if torch.is_tensor(timestep) else timestep
217
+ pos = self._index_counter[timestep_int]
218
+
188
219
  return indices[pos].item()
189
220
 
221
+ @property
222
+ def init_noise_sigma(self):
223
+ # standard deviation of the initial noise distribution
224
+ if self.config.timestep_spacing in ["linspace", "trailing"]:
225
+ return self.sigmas.max()
226
+
227
+ return (self.sigmas.max() ** 2 + 1) ** 0.5
228
+
190
229
  def scale_model_input(
191
230
  self,
192
231
  sample: torch.FloatTensor,
@@ -226,7 +265,25 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
226
265
 
227
266
  num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps
228
267
 
229
- timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
268
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
269
+ if self.config.timestep_spacing == "linspace":
270
+ timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
271
+ elif self.config.timestep_spacing == "leading":
272
+ step_ratio = num_train_timesteps // self.num_inference_steps
273
+ # creates integer timesteps by multiplying by ratio
274
+ # casting to int to avoid issues when num_inference_step is power of 3
275
+ timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float)
276
+ timesteps += self.config.steps_offset
277
+ elif self.config.timestep_spacing == "trailing":
278
+ step_ratio = num_train_timesteps / self.num_inference_steps
279
+ # creates integer timesteps by multiplying by ratio
280
+ # casting to int to avoid issues when num_inference_step is power of 3
281
+ timesteps = (np.arange(num_train_timesteps, 0, -step_ratio)).round().copy().astype(float)
282
+ timesteps -= 1
283
+ else:
284
+ raise ValueError(
285
+ f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
286
+ )
230
287
 
231
288
  sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
232
289
  log_sigmas = np.log(sigmas)
@@ -242,9 +299,6 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
242
299
  sigmas = torch.from_numpy(sigmas).to(device=device)
243
300
  self.sigmas = torch.cat([sigmas[:1], sigmas[1:-1].repeat_interleave(2), sigmas[-1:]])
244
301
 
245
- # standard deviation of the initial noise distribution
246
- self.init_noise_sigma = self.sigmas.max()
247
-
248
302
  timesteps = torch.from_numpy(timesteps)
249
303
  second_order_timesteps = torch.from_numpy(second_order_timesteps)
250
304
  timesteps = torch.cat([timesteps[:1], timesteps[1:].repeat_interleave(2)])
@@ -260,6 +314,10 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
260
314
  self.sample = None
261
315
  self.mid_point_sigma = None
262
316
 
317
+ # for exp beta schedules, such as the one for `pipeline_shap_e.py`
318
+ # we need an index counter
319
+ self._index_counter = defaultdict(int)
320
+
263
321
  def _second_order_timesteps(self, sigmas, log_sigmas):
264
322
  def sigma_fn(_t):
265
323
  return np.exp(-_t)
@@ -341,6 +399,10 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
341
399
  """
342
400
  step_index = self.index_for_timestep(timestep)
343
401
 
402
+ # advance index counter by 1
403
+ timestep_int = timestep.cpu().item() if torch.is_tensor(timestep) else timestep
404
+ self._index_counter[timestep_int] += 1
405
+
344
406
  # Create a noise sampler if it hasn't been created yet
345
407
  if self.noise_sampler is None:
346
408
  min_sigma, max_sigma = self.sigmas[self.sigmas > 0].min(), self.sigmas.max()
@@ -29,7 +29,11 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
29
29
 
30
30
 
31
31
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
32
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
32
+ def betas_for_alpha_bar(
33
+ num_diffusion_timesteps,
34
+ max_beta=0.999,
35
+ alpha_transform_type="cosine",
36
+ ):
33
37
  """
34
38
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
35
39
  (1-beta) over time from t = [0,1].
@@ -42,19 +46,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
42
46
  num_diffusion_timesteps (`int`): the number of betas to produce.
43
47
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
44
48
  prevent singularities.
49
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
50
+ Choose from `cosine` or `exp`
45
51
 
46
52
  Returns:
47
53
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
48
54
  """
55
+ if alpha_transform_type == "cosine":
49
56
 
50
- def alpha_bar(time_step):
51
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
57
+ def alpha_bar_fn(t):
58
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
59
+
60
+ elif alpha_transform_type == "exp":
61
+
62
+ def alpha_bar_fn(t):
63
+ return math.exp(t * -12.0)
64
+
65
+ else:
66
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
52
67
 
53
68
  betas = []
54
69
  for i in range(num_diffusion_timesteps):
55
70
  t1 = i / num_diffusion_timesteps
56
71
  t2 = (i + 1) / num_diffusion_timesteps
57
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
72
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
58
73
  return torch.tensor(betas, dtype=torch.float32)
59
74
 
60
75
 
@@ -202,7 +217,6 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
202
217
  self.model_outputs = [None] * solver_order
203
218
  self.sample = None
204
219
  self.order_list = self.get_order_list(num_train_timesteps)
205
- self.use_karras_sigmas = use_karras_sigmas
206
220
 
207
221
  def get_order_list(self, num_inference_steps: int) -> List[int]:
208
222
  """
@@ -259,13 +273,15 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
259
273
  .astype(np.int64)
260
274
  )
261
275
 
262
- if self.use_karras_sigmas:
263
- sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
276
+ sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
277
+ if self.config.use_karras_sigmas:
264
278
  log_sigmas = np.log(sigmas)
265
279
  sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
266
280
  timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
267
281
  timesteps = np.flip(timesteps).copy().astype(np.int64)
268
282
 
283
+ self.sigmas = torch.from_numpy(sigmas)
284
+
269
285
  self.timesteps = torch.from_numpy(timesteps).to(device)
270
286
  self.model_outputs = [None] * self.config.solver_order
271
287
  self.sample = None
@@ -47,7 +47,11 @@ class EulerAncestralDiscreteSchedulerOutput(BaseOutput):
47
47
 
48
48
 
49
49
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
50
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor:
50
+ def betas_for_alpha_bar(
51
+ num_diffusion_timesteps,
52
+ max_beta=0.999,
53
+ alpha_transform_type="cosine",
54
+ ):
51
55
  """
52
56
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
53
57
  (1-beta) over time from t = [0,1].
@@ -60,19 +64,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor
60
64
  num_diffusion_timesteps (`int`): the number of betas to produce.
61
65
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
62
66
  prevent singularities.
67
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
68
+ Choose from `cosine` or `exp`
63
69
 
64
70
  Returns:
65
71
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
66
72
  """
73
+ if alpha_transform_type == "cosine":
67
74
 
68
- def alpha_bar(time_step):
69
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
75
+ def alpha_bar_fn(t):
76
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
77
+
78
+ elif alpha_transform_type == "exp":
79
+
80
+ def alpha_bar_fn(t):
81
+ return math.exp(t * -12.0)
82
+
83
+ else:
84
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
70
85
 
71
86
  betas = []
72
87
  for i in range(num_diffusion_timesteps):
73
88
  t1 = i / num_diffusion_timesteps
74
89
  t2 = (i + 1) / num_diffusion_timesteps
75
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
90
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
76
91
  return torch.tensor(betas, dtype=torch.float32)
77
92
 
78
93
 
@@ -99,7 +114,13 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
99
114
  prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
100
115
  process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
101
116
  https://imagen.research.google/video/paper.pdf)
102
-
117
+ timestep_spacing (`str`, default `"linspace"`):
118
+ The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
119
+ Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
120
+ steps_offset (`int`, default `0`):
121
+ an offset added to the inference steps. You can use a combination of `offset=1` and
122
+ `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
123
+ stable diffusion.
103
124
  """
104
125
 
105
126
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -114,6 +135,8 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
114
135
  beta_schedule: str = "linear",
115
136
  trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
116
137
  prediction_type: str = "epsilon",
138
+ timestep_spacing: str = "linspace",
139
+ steps_offset: int = 0,
117
140
  ):
118
141
  if trained_betas is not None:
119
142
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -137,15 +160,20 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
137
160
  sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
138
161
  self.sigmas = torch.from_numpy(sigmas)
139
162
 
140
- # standard deviation of the initial noise distribution
141
- self.init_noise_sigma = self.sigmas.max()
142
-
143
163
  # setable values
144
164
  self.num_inference_steps = None
145
165
  timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=float)[::-1].copy()
146
166
  self.timesteps = torch.from_numpy(timesteps)
147
167
  self.is_scale_input_called = False
148
168
 
169
+ @property
170
+ def init_noise_sigma(self):
171
+ # standard deviation of the initial noise distribution
172
+ if self.config.timestep_spacing in ["linspace", "trailing"]:
173
+ return self.sigmas.max()
174
+
175
+ return (self.sigmas.max() ** 2 + 1) ** 0.5
176
+
149
177
  def scale_model_input(
150
178
  self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
151
179
  ) -> torch.FloatTensor:
@@ -179,7 +207,28 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
179
207
  """
180
208
  self.num_inference_steps = num_inference_steps
181
209
 
182
- timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
210
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
211
+ if self.config.timestep_spacing == "linspace":
212
+ timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=float)[
213
+ ::-1
214
+ ].copy()
215
+ elif self.config.timestep_spacing == "leading":
216
+ step_ratio = self.config.num_train_timesteps // self.num_inference_steps
217
+ # creates integer timesteps by multiplying by ratio
218
+ # casting to int to avoid issues when num_inference_step is power of 3
219
+ timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float)
220
+ timesteps += self.config.steps_offset
221
+ elif self.config.timestep_spacing == "trailing":
222
+ step_ratio = self.config.num_train_timesteps / self.num_inference_steps
223
+ # creates integer timesteps by multiplying by ratio
224
+ # casting to int to avoid issues when num_inference_step is power of 3
225
+ timesteps = (np.arange(self.config.num_train_timesteps, 0, -step_ratio)).round().copy().astype(float)
226
+ timesteps -= 1
227
+ else:
228
+ raise ValueError(
229
+ f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
230
+ )
231
+
183
232
  sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
184
233
  sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
185
234
  sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)