diffusers 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (238) hide show
  1. diffusers/__init__.py +26 -2
  2. diffusers/commands/fp16_safetensors.py +10 -11
  3. diffusers/configuration_utils.py +13 -8
  4. diffusers/dependency_versions_check.py +0 -1
  5. diffusers/dependency_versions_table.py +5 -5
  6. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  7. diffusers/image_processor.py +463 -51
  8. diffusers/loaders/__init__.py +82 -0
  9. diffusers/loaders/ip_adapter.py +159 -0
  10. diffusers/loaders/lora.py +1553 -0
  11. diffusers/loaders/lora_conversion_utils.py +284 -0
  12. diffusers/loaders/single_file.py +637 -0
  13. diffusers/loaders/textual_inversion.py +455 -0
  14. diffusers/loaders/unet.py +828 -0
  15. diffusers/loaders/utils.py +59 -0
  16. diffusers/models/__init__.py +26 -9
  17. diffusers/models/activations.py +9 -6
  18. diffusers/models/attention.py +301 -29
  19. diffusers/models/attention_flax.py +9 -1
  20. diffusers/models/attention_processor.py +378 -6
  21. diffusers/models/autoencoders/__init__.py +5 -0
  22. diffusers/models/{autoencoder_asym_kl.py → autoencoders/autoencoder_asym_kl.py} +17 -12
  23. diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +47 -23
  24. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +402 -0
  25. diffusers/models/{autoencoder_tiny.py → autoencoders/autoencoder_tiny.py} +24 -28
  26. diffusers/models/{consistency_decoder_vae.py → autoencoders/consistency_decoder_vae.py} +51 -44
  27. diffusers/models/{vae.py → autoencoders/vae.py} +71 -17
  28. diffusers/models/controlnet.py +59 -39
  29. diffusers/models/controlnet_flax.py +19 -18
  30. diffusers/models/downsampling.py +338 -0
  31. diffusers/models/embeddings.py +112 -29
  32. diffusers/models/embeddings_flax.py +2 -0
  33. diffusers/models/lora.py +131 -1
  34. diffusers/models/modeling_flax_utils.py +14 -8
  35. diffusers/models/modeling_outputs.py +17 -0
  36. diffusers/models/modeling_utils.py +37 -29
  37. diffusers/models/normalization.py +110 -4
  38. diffusers/models/resnet.py +299 -652
  39. diffusers/models/transformer_2d.py +22 -5
  40. diffusers/models/transformer_temporal.py +183 -1
  41. diffusers/models/unet_2d_blocks_flax.py +5 -0
  42. diffusers/models/unet_2d_condition.py +46 -0
  43. diffusers/models/unet_2d_condition_flax.py +13 -13
  44. diffusers/models/unet_3d_blocks.py +957 -173
  45. diffusers/models/unet_3d_condition.py +16 -8
  46. diffusers/models/unet_kandinsky3.py +535 -0
  47. diffusers/models/unet_motion_model.py +48 -33
  48. diffusers/models/unet_spatio_temporal_condition.py +489 -0
  49. diffusers/models/upsampling.py +454 -0
  50. diffusers/models/uvit_2d.py +471 -0
  51. diffusers/models/vae_flax.py +7 -0
  52. diffusers/models/vq_model.py +12 -3
  53. diffusers/optimization.py +16 -9
  54. diffusers/pipelines/__init__.py +137 -76
  55. diffusers/pipelines/amused/__init__.py +62 -0
  56. diffusers/pipelines/amused/pipeline_amused.py +328 -0
  57. diffusers/pipelines/amused/pipeline_amused_img2img.py +347 -0
  58. diffusers/pipelines/amused/pipeline_amused_inpaint.py +378 -0
  59. diffusers/pipelines/animatediff/pipeline_animatediff.py +66 -8
  60. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -0
  61. diffusers/pipelines/auto_pipeline.py +23 -13
  62. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -0
  63. diffusers/pipelines/controlnet/pipeline_controlnet.py +238 -35
  64. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +148 -37
  65. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +155 -41
  66. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +123 -43
  67. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +216 -39
  68. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +106 -34
  69. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +1 -0
  70. diffusers/pipelines/ddim/pipeline_ddim.py +1 -0
  71. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -0
  72. diffusers/pipelines/deepfloyd_if/pipeline_if.py +13 -1
  73. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +13 -1
  74. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +13 -1
  75. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +13 -1
  76. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +13 -1
  77. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +13 -1
  78. diffusers/pipelines/deprecated/__init__.py +153 -0
  79. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/__init__.py +3 -3
  80. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion.py +177 -34
  81. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion_img2img.py +182 -37
  82. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_output.py +1 -1
  83. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/__init__.py +1 -1
  84. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/mel.py +2 -2
  85. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/pipeline_audio_diffusion.py +4 -4
  86. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/__init__.py +1 -1
  87. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/pipeline_latent_diffusion_uncond.py +4 -4
  88. diffusers/pipelines/{pndm → deprecated/pndm}/__init__.py +1 -1
  89. diffusers/pipelines/{pndm → deprecated/pndm}/pipeline_pndm.py +4 -4
  90. diffusers/pipelines/{repaint → deprecated/repaint}/__init__.py +1 -1
  91. diffusers/pipelines/{repaint → deprecated/repaint}/pipeline_repaint.py +5 -5
  92. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/__init__.py +1 -1
  93. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/pipeline_score_sde_ve.py +5 -4
  94. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/__init__.py +6 -6
  95. diffusers/pipelines/{spectrogram_diffusion/continous_encoder.py → deprecated/spectrogram_diffusion/continuous_encoder.py} +2 -2
  96. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/midi_utils.py +1 -1
  97. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/notes_encoder.py +2 -2
  98. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/pipeline_spectrogram_diffusion.py +8 -7
  99. diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py +55 -0
  100. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_cycle_diffusion.py +34 -13
  101. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_onnx_stable_diffusion_inpaint_legacy.py +7 -6
  102. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_inpaint_legacy.py +12 -11
  103. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_model_editing.py +17 -11
  104. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_paradigms.py +11 -10
  105. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_pix2pix_zero.py +14 -13
  106. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/__init__.py +1 -1
  107. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/pipeline_stochastic_karras_ve.py +4 -4
  108. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/__init__.py +3 -3
  109. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py +83 -51
  110. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py +4 -4
  111. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py +7 -6
  112. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py +7 -6
  113. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py +7 -6
  114. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py +3 -3
  115. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py +5 -5
  116. diffusers/pipelines/dit/pipeline_dit.py +1 -0
  117. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +1 -1
  118. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +3 -3
  119. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  120. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +1 -1
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +1 -1
  122. diffusers/pipelines/kandinsky3/__init__.py +49 -0
  123. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +98 -0
  124. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +589 -0
  125. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +654 -0
  126. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +111 -11
  127. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +102 -9
  128. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -0
  129. diffusers/pipelines/musicldm/pipeline_musicldm.py +1 -1
  130. diffusers/pipelines/onnx_utils.py +8 -5
  131. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +7 -2
  132. diffusers/pipelines/pipeline_flax_utils.py +11 -8
  133. diffusers/pipelines/pipeline_utils.py +63 -42
  134. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +247 -38
  135. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +3 -3
  136. diffusers/pipelines/stable_diffusion/__init__.py +37 -65
  137. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +75 -78
  138. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +2 -2
  139. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +2 -4
  140. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -0
  141. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +174 -11
  142. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +8 -3
  143. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +1 -0
  144. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +178 -11
  145. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +224 -13
  146. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +74 -20
  147. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -0
  148. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +7 -0
  149. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -0
  150. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -0
  151. diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py +48 -0
  152. diffusers/pipelines/{stable_diffusion → stable_diffusion_attend_and_excite}/pipeline_stable_diffusion_attend_and_excite.py +6 -2
  153. diffusers/pipelines/stable_diffusion_diffedit/__init__.py +48 -0
  154. diffusers/pipelines/{stable_diffusion → stable_diffusion_diffedit}/pipeline_stable_diffusion_diffedit.py +3 -3
  155. diffusers/pipelines/stable_diffusion_gligen/__init__.py +50 -0
  156. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen.py +3 -2
  157. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen_text_image.py +4 -3
  158. diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py +60 -0
  159. diffusers/pipelines/{stable_diffusion → stable_diffusion_k_diffusion}/pipeline_stable_diffusion_k_diffusion.py +7 -1
  160. diffusers/pipelines/stable_diffusion_ldm3d/__init__.py +48 -0
  161. diffusers/pipelines/{stable_diffusion → stable_diffusion_ldm3d}/pipeline_stable_diffusion_ldm3d.py +51 -7
  162. diffusers/pipelines/stable_diffusion_panorama/__init__.py +48 -0
  163. diffusers/pipelines/{stable_diffusion → stable_diffusion_panorama}/pipeline_stable_diffusion_panorama.py +57 -8
  164. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +58 -6
  165. diffusers/pipelines/stable_diffusion_sag/__init__.py +48 -0
  166. diffusers/pipelines/{stable_diffusion → stable_diffusion_sag}/pipeline_stable_diffusion_sag.py +68 -10
  167. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +194 -17
  168. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +205 -16
  169. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +206 -17
  170. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +23 -17
  171. diffusers/pipelines/stable_video_diffusion/__init__.py +58 -0
  172. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +652 -0
  173. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +108 -12
  174. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +115 -14
  175. diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -0
  176. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +6 -0
  177. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +23 -3
  178. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +334 -10
  179. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +1331 -0
  180. diffusers/pipelines/unclip/pipeline_unclip.py +2 -1
  181. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -0
  182. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  183. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +14 -4
  184. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +9 -5
  185. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +1 -1
  186. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +2 -2
  187. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -1
  188. diffusers/schedulers/__init__.py +4 -4
  189. diffusers/schedulers/deprecated/__init__.py +50 -0
  190. diffusers/schedulers/{scheduling_karras_ve.py → deprecated/scheduling_karras_ve.py} +4 -4
  191. diffusers/schedulers/{scheduling_sde_vp.py → deprecated/scheduling_sde_vp.py} +4 -6
  192. diffusers/schedulers/scheduling_amused.py +162 -0
  193. diffusers/schedulers/scheduling_consistency_models.py +2 -0
  194. diffusers/schedulers/scheduling_ddim.py +1 -3
  195. diffusers/schedulers/scheduling_ddim_inverse.py +2 -7
  196. diffusers/schedulers/scheduling_ddim_parallel.py +1 -3
  197. diffusers/schedulers/scheduling_ddpm.py +47 -3
  198. diffusers/schedulers/scheduling_ddpm_parallel.py +47 -3
  199. diffusers/schedulers/scheduling_deis_multistep.py +28 -6
  200. diffusers/schedulers/scheduling_dpmsolver_multistep.py +28 -6
  201. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +28 -6
  202. diffusers/schedulers/scheduling_dpmsolver_sde.py +3 -3
  203. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +28 -6
  204. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +59 -3
  205. diffusers/schedulers/scheduling_euler_discrete.py +102 -16
  206. diffusers/schedulers/scheduling_heun_discrete.py +17 -5
  207. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +17 -5
  208. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +17 -5
  209. diffusers/schedulers/scheduling_lcm.py +123 -29
  210. diffusers/schedulers/scheduling_lms_discrete.py +3 -3
  211. diffusers/schedulers/scheduling_pndm.py +1 -3
  212. diffusers/schedulers/scheduling_repaint.py +1 -3
  213. diffusers/schedulers/scheduling_unipc_multistep.py +28 -6
  214. diffusers/schedulers/scheduling_utils.py +3 -1
  215. diffusers/schedulers/scheduling_utils_flax.py +3 -1
  216. diffusers/training_utils.py +1 -1
  217. diffusers/utils/__init__.py +1 -2
  218. diffusers/utils/constants.py +10 -12
  219. diffusers/utils/dummy_pt_objects.py +75 -0
  220. diffusers/utils/dummy_torch_and_transformers_objects.py +105 -0
  221. diffusers/utils/dynamic_modules_utils.py +18 -22
  222. diffusers/utils/export_utils.py +8 -3
  223. diffusers/utils/hub_utils.py +24 -36
  224. diffusers/utils/logging.py +11 -11
  225. diffusers/utils/outputs.py +5 -5
  226. diffusers/utils/peft_utils.py +88 -44
  227. diffusers/utils/state_dict_utils.py +8 -0
  228. diffusers/utils/testing_utils.py +199 -1
  229. diffusers/utils/torch_utils.py +4 -4
  230. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/METADATA +86 -69
  231. diffusers-0.25.0.dist-info/RECORD +360 -0
  232. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/WHEEL +1 -1
  233. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/entry_points.txt +0 -1
  234. diffusers/loaders.py +0 -3336
  235. diffusers-0.23.1.dist-info/RECORD +0 -323
  236. /diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/modeling_roberta_series.py +0 -0
  237. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/LICENSE +0 -0
  238. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/top_level.txt +0 -0
@@ -91,6 +91,43 @@ def betas_for_alpha_bar(
91
91
  return torch.tensor(betas, dtype=torch.float32)
92
92
 
93
93
 
94
+ # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
95
+ def rescale_zero_terminal_snr(betas):
96
+ """
97
+ Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
98
+
99
+
100
+ Args:
101
+ betas (`torch.FloatTensor`):
102
+ the betas that the scheduler is being initialized with.
103
+
104
+ Returns:
105
+ `torch.FloatTensor`: rescaled betas with zero terminal SNR
106
+ """
107
+ # Convert betas to alphas_bar_sqrt
108
+ alphas = 1.0 - betas
109
+ alphas_cumprod = torch.cumprod(alphas, dim=0)
110
+ alphas_bar_sqrt = alphas_cumprod.sqrt()
111
+
112
+ # Store old values.
113
+ alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
114
+ alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
115
+
116
+ # Shift so the last timestep is zero.
117
+ alphas_bar_sqrt -= alphas_bar_sqrt_T
118
+
119
+ # Scale so the first timestep is back to the old value.
120
+ alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
121
+
122
+ # Convert alphas_bar_sqrt to betas
123
+ alphas_bar = alphas_bar_sqrt**2 # Revert sqrt
124
+ alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod
125
+ alphas = torch.cat([alphas_bar[0:1], alphas])
126
+ betas = 1 - alphas
127
+
128
+ return betas
129
+
130
+
94
131
  class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
95
132
  """
96
133
  Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and
@@ -139,6 +176,10 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
139
176
  an offset added to the inference steps. You can use a combination of `offset=1` and
140
177
  `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
141
178
  stable diffusion.
179
+ rescale_betas_zero_snr (`bool`, defaults to `False`):
180
+ Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
181
+ dark samples instead of limiting it to samples with medium brightness. Loosely related to
182
+ [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
142
183
  """
143
184
 
144
185
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -163,6 +204,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
163
204
  sample_max_value: float = 1.0,
164
205
  timestep_spacing: str = "leading",
165
206
  steps_offset: int = 0,
207
+ rescale_betas_zero_snr: int = False,
166
208
  ):
167
209
  if trained_betas is not None:
168
210
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -170,9 +212,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
170
212
  self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
171
213
  elif beta_schedule == "scaled_linear":
172
214
  # this schedule is very specific to the latent diffusion model.
173
- self.betas = (
174
- torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
175
- )
215
+ self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
176
216
  elif beta_schedule == "squaredcos_cap_v2":
177
217
  # Glide cosine schedule
178
218
  self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -183,6 +223,10 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
183
223
  else:
184
224
  raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
185
225
 
226
+ # Rescale for zero SNR
227
+ if rescale_betas_zero_snr:
228
+ self.betas = rescale_zero_terminal_snr(self.betas)
229
+
186
230
  self.alphas = 1.0 - self.betas
187
231
  self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
188
232
  self.one = torch.tensor(1.0)
@@ -149,9 +149,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
149
149
  self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
150
150
  elif beta_schedule == "scaled_linear":
151
151
  # this schedule is very specific to the latent diffusion model.
152
- self.betas = (
153
- torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
154
- )
152
+ self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
155
153
  elif beta_schedule == "squaredcos_cap_v2":
156
154
  # Glide cosine schedule
157
155
  self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -164,6 +162,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
164
162
  self.alpha_t = torch.sqrt(self.alphas_cumprod)
165
163
  self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
166
164
  self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
165
+ self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
167
166
 
168
167
  # standard deviation of the initial noise distribution
169
168
  self.init_noise_sigma = 1.0
@@ -188,6 +187,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
188
187
  self.model_outputs = [None] * solver_order
189
188
  self.lower_order_nums = 0
190
189
  self._step_index = None
190
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
191
191
 
192
192
  @property
193
193
  def step_index(self):
@@ -255,6 +255,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
255
255
 
256
256
  # add an index counter for schedulers that allow duplicated timesteps
257
257
  self._step_index = None
258
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
258
259
 
259
260
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
260
261
  def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
@@ -325,8 +326,20 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
325
326
  def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
326
327
  """Constructs the noise schedule of Karras et al. (2022)."""
327
328
 
328
- sigma_min: float = in_sigmas[-1].item()
329
- sigma_max: float = in_sigmas[0].item()
329
+ # Hack to make sure that other schedulers which copy this function don't break
330
+ # TODO: Add this logic to the other schedulers
331
+ if hasattr(self.config, "sigma_min"):
332
+ sigma_min = self.config.sigma_min
333
+ else:
334
+ sigma_min = None
335
+
336
+ if hasattr(self.config, "sigma_max"):
337
+ sigma_max = self.config.sigma_max
338
+ else:
339
+ sigma_max = None
340
+
341
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
342
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
330
343
 
331
344
  rho = 7.0 # 7.0 is the value used in the paper
332
345
  ramp = np.linspace(0, 1, num_inference_steps)
@@ -723,7 +736,16 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
723
736
  schedule_timesteps = self.timesteps.to(original_samples.device)
724
737
  timesteps = timesteps.to(original_samples.device)
725
738
 
726
- step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
739
+ step_indices = []
740
+ for timestep in timesteps:
741
+ index_candidates = (schedule_timesteps == timestep).nonzero()
742
+ if len(index_candidates) == 0:
743
+ step_index = len(schedule_timesteps) - 1
744
+ elif len(index_candidates) > 1:
745
+ step_index = index_candidates[1].item()
746
+ else:
747
+ step_index = index_candidates[0].item()
748
+ step_indices.append(step_index)
727
749
 
728
750
  sigma = sigmas[step_indices].flatten()
729
751
  while len(sigma.shape) < len(original_samples.shape):
@@ -176,9 +176,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
176
176
  self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
177
177
  elif beta_schedule == "scaled_linear":
178
178
  # this schedule is very specific to the latent diffusion model.
179
- self.betas = (
180
- torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
181
- )
179
+ self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
182
180
  elif beta_schedule == "squaredcos_cap_v2":
183
181
  # Glide cosine schedule
184
182
  self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -191,6 +189,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
191
189
  self.alpha_t = torch.sqrt(self.alphas_cumprod)
192
190
  self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
193
191
  self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
192
+ self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
194
193
 
195
194
  # standard deviation of the initial noise distribution
196
195
  self.init_noise_sigma = 1.0
@@ -215,6 +214,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
215
214
  self.model_outputs = [None] * solver_order
216
215
  self.lower_order_nums = 0
217
216
  self._step_index = None
217
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
218
218
 
219
219
  @property
220
220
  def step_index(self):
@@ -291,6 +291,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
291
291
 
292
292
  # add an index counter for schedulers that allow duplicated timesteps
293
293
  self._step_index = None
294
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
294
295
 
295
296
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
296
297
  def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
@@ -360,8 +361,20 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
360
361
  def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
361
362
  """Constructs the noise schedule of Karras et al. (2022)."""
362
363
 
363
- sigma_min: float = in_sigmas[-1].item()
364
- sigma_max: float = in_sigmas[0].item()
364
+ # Hack to make sure that other schedulers which copy this function don't break
365
+ # TODO: Add this logic to the other schedulers
366
+ if hasattr(self.config, "sigma_min"):
367
+ sigma_min = self.config.sigma_min
368
+ else:
369
+ sigma_min = None
370
+
371
+ if hasattr(self.config, "sigma_max"):
372
+ sigma_max = self.config.sigma_max
373
+ else:
374
+ sigma_max = None
375
+
376
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
377
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
365
378
 
366
379
  rho = 7.0 # 7.0 is the value used in the paper
367
380
  ramp = np.linspace(0, 1, num_inference_steps)
@@ -885,7 +898,16 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
885
898
  schedule_timesteps = self.timesteps.to(original_samples.device)
886
899
  timesteps = timesteps.to(original_samples.device)
887
900
 
888
- step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
901
+ step_indices = []
902
+ for timestep in timesteps:
903
+ index_candidates = (schedule_timesteps == timestep).nonzero()
904
+ if len(index_candidates) == 0:
905
+ step_index = len(schedule_timesteps) - 1
906
+ elif len(index_candidates) > 1:
907
+ step_index = index_candidates[1].item()
908
+ else:
909
+ step_index = index_candidates[0].item()
910
+ step_indices.append(step_index)
889
911
 
890
912
  sigma = sigmas[step_indices].flatten()
891
913
  while len(sigma.shape) < len(original_samples.shape):
@@ -171,9 +171,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
171
171
  self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
172
172
  elif beta_schedule == "scaled_linear":
173
173
  # this schedule is very specific to the latent diffusion model.
174
- self.betas = (
175
- torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
176
- )
174
+ self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
177
175
  elif beta_schedule == "squaredcos_cap_v2":
178
176
  # Glide cosine schedule
179
177
  self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -186,6 +184,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
186
184
  self.alpha_t = torch.sqrt(self.alphas_cumprod)
187
185
  self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
188
186
  self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
187
+ self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
189
188
 
190
189
  # standard deviation of the initial noise distribution
191
190
  self.init_noise_sigma = 1.0
@@ -210,6 +209,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
210
209
  self.model_outputs = [None] * solver_order
211
210
  self.lower_order_nums = 0
212
211
  self._step_index = None
212
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
213
213
  self.use_karras_sigmas = use_karras_sigmas
214
214
 
215
215
  @property
@@ -290,6 +290,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
290
290
 
291
291
  # add an index counter for schedulers that allow duplicated timesteps
292
292
  self._step_index = None
293
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
293
294
 
294
295
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
295
296
  def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
@@ -360,8 +361,20 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
360
361
  def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
361
362
  """Constructs the noise schedule of Karras et al. (2022)."""
362
363
 
363
- sigma_min: float = in_sigmas[-1].item()
364
- sigma_max: float = in_sigmas[0].item()
364
+ # Hack to make sure that other schedulers which copy this function don't break
365
+ # TODO: Add this logic to the other schedulers
366
+ if hasattr(self.config, "sigma_min"):
367
+ sigma_min = self.config.sigma_min
368
+ else:
369
+ sigma_min = None
370
+
371
+ if hasattr(self.config, "sigma_max"):
372
+ sigma_max = self.config.sigma_max
373
+ else:
374
+ sigma_max = None
375
+
376
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
377
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
365
378
 
366
379
  rho = 7.0 # 7.0 is the value used in the paper
367
380
  ramp = np.linspace(0, 1, num_inference_steps)
@@ -880,7 +893,16 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
880
893
  schedule_timesteps = self.timesteps.to(original_samples.device)
881
894
  timesteps = timesteps.to(original_samples.device)
882
895
 
883
- step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
896
+ step_indices = []
897
+ for timestep in timesteps:
898
+ index_candidates = (schedule_timesteps == timestep).nonzero()
899
+ if len(index_candidates) == 0:
900
+ step_index = len(schedule_timesteps) - 1
901
+ elif len(index_candidates) > 1:
902
+ step_index = index_candidates[1].item()
903
+ else:
904
+ step_index = index_candidates[0].item()
905
+ step_indices.append(step_index)
884
906
 
885
907
  sigma = sigmas[step_indices].flatten()
886
908
  while len(sigma.shape) < len(original_samples.shape):
@@ -182,9 +182,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
182
182
  self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
183
183
  elif beta_schedule == "scaled_linear":
184
184
  # this schedule is very specific to the latent diffusion model.
185
- self.betas = (
186
- torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
187
- )
185
+ self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
188
186
  elif beta_schedule == "squaredcos_cap_v2":
189
187
  # Glide cosine schedule
190
188
  self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -200,6 +198,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
200
198
  self.noise_sampler = None
201
199
  self.noise_sampler_seed = noise_sampler_seed
202
200
  self._step_index = None
201
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
203
202
 
204
203
  # Copied from diffusers.schedulers.scheduling_heun_discrete.HeunDiscreteScheduler.index_for_timestep
205
204
  def index_for_timestep(self, timestep, schedule_timesteps=None):
@@ -349,6 +348,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
349
348
  self.mid_point_sigma = None
350
349
 
351
350
  self._step_index = None
351
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
352
352
  self.noise_sampler = None
353
353
 
354
354
  # for exp beta schedules, such as the one for `pipeline_shap_e.py`
@@ -159,9 +159,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
159
159
  self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
160
160
  elif beta_schedule == "scaled_linear":
161
161
  # this schedule is very specific to the latent diffusion model.
162
- self.betas = (
163
- torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
164
- )
162
+ self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
165
163
  elif beta_schedule == "squaredcos_cap_v2":
166
164
  # Glide cosine schedule
167
165
  self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -174,6 +172,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
174
172
  self.alpha_t = torch.sqrt(self.alphas_cumprod)
175
173
  self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
176
174
  self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
175
+ self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
177
176
 
178
177
  # standard deviation of the initial noise distribution
179
178
  self.init_noise_sigma = 1.0
@@ -198,6 +197,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
198
197
  self.sample = None
199
198
  self.order_list = self.get_order_list(num_train_timesteps)
200
199
  self._step_index = None
200
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
201
201
 
202
202
  def get_order_list(self, num_inference_steps: int) -> List[int]:
203
203
  """
@@ -289,6 +289,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
289
289
 
290
290
  # add an index counter for schedulers that allow duplicated timesteps
291
291
  self._step_index = None
292
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
292
293
 
293
294
  # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
294
295
  def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
@@ -359,8 +360,20 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
359
360
  def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
360
361
  """Constructs the noise schedule of Karras et al. (2022)."""
361
362
 
362
- sigma_min: float = in_sigmas[-1].item()
363
- sigma_max: float = in_sigmas[0].item()
363
+ # Hack to make sure that other schedulers which copy this function don't break
364
+ # TODO: Add this logic to the other schedulers
365
+ if hasattr(self.config, "sigma_min"):
366
+ sigma_min = self.config.sigma_min
367
+ else:
368
+ sigma_min = None
369
+
370
+ if hasattr(self.config, "sigma_max"):
371
+ sigma_max = self.config.sigma_max
372
+ else:
373
+ sigma_max = None
374
+
375
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
376
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
364
377
 
365
378
  rho = 7.0 # 7.0 is the value used in the paper
366
379
  ramp = np.linspace(0, 1, num_inference_steps)
@@ -886,7 +899,16 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
886
899
  schedule_timesteps = self.timesteps.to(original_samples.device)
887
900
  timesteps = timesteps.to(original_samples.device)
888
901
 
889
- step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
902
+ step_indices = []
903
+ for timestep in timesteps:
904
+ index_candidates = (schedule_timesteps == timestep).nonzero()
905
+ if len(index_candidates) == 0:
906
+ step_index = len(schedule_timesteps) - 1
907
+ elif len(index_candidates) > 1:
908
+ step_index = index_candidates[1].item()
909
+ else:
910
+ step_index = index_candidates[0].item()
911
+ step_indices.append(step_index)
890
912
 
891
913
  sigma = sigmas[step_indices].flatten()
892
914
  while len(sigma.shape) < len(original_samples.shape):
@@ -92,6 +92,43 @@ def betas_for_alpha_bar(
92
92
  return torch.tensor(betas, dtype=torch.float32)
93
93
 
94
94
 
95
+ # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
96
+ def rescale_zero_terminal_snr(betas):
97
+ """
98
+ Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
99
+
100
+
101
+ Args:
102
+ betas (`torch.FloatTensor`):
103
+ the betas that the scheduler is being initialized with.
104
+
105
+ Returns:
106
+ `torch.FloatTensor`: rescaled betas with zero terminal SNR
107
+ """
108
+ # Convert betas to alphas_bar_sqrt
109
+ alphas = 1.0 - betas
110
+ alphas_cumprod = torch.cumprod(alphas, dim=0)
111
+ alphas_bar_sqrt = alphas_cumprod.sqrt()
112
+
113
+ # Store old values.
114
+ alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
115
+ alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
116
+
117
+ # Shift so the last timestep is zero.
118
+ alphas_bar_sqrt -= alphas_bar_sqrt_T
119
+
120
+ # Scale so the first timestep is back to the old value.
121
+ alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
122
+
123
+ # Convert alphas_bar_sqrt to betas
124
+ alphas_bar = alphas_bar_sqrt**2 # Revert sqrt
125
+ alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod
126
+ alphas = torch.cat([alphas_bar[0:1], alphas])
127
+ betas = 1 - alphas
128
+
129
+ return betas
130
+
131
+
95
132
  class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
96
133
  """
97
134
  Ancestral sampling with Euler method steps.
@@ -122,6 +159,10 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
122
159
  An offset added to the inference steps. You can use a combination of `offset=1` and
123
160
  `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
124
161
  Diffusion.
162
+ rescale_betas_zero_snr (`bool`, defaults to `False`):
163
+ Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
164
+ dark samples instead of limiting it to samples with medium brightness. Loosely related to
165
+ [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
125
166
  """
126
167
 
127
168
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -138,6 +179,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
138
179
  prediction_type: str = "epsilon",
139
180
  timestep_spacing: str = "linspace",
140
181
  steps_offset: int = 0,
182
+ rescale_betas_zero_snr: bool = False,
141
183
  ):
142
184
  if trained_betas is not None:
143
185
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -145,18 +187,24 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
145
187
  self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
146
188
  elif beta_schedule == "scaled_linear":
147
189
  # this schedule is very specific to the latent diffusion model.
148
- self.betas = (
149
- torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
150
- )
190
+ self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
151
191
  elif beta_schedule == "squaredcos_cap_v2":
152
192
  # Glide cosine schedule
153
193
  self.betas = betas_for_alpha_bar(num_train_timesteps)
154
194
  else:
155
195
  raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
156
196
 
197
+ if rescale_betas_zero_snr:
198
+ self.betas = rescale_zero_terminal_snr(self.betas)
199
+
157
200
  self.alphas = 1.0 - self.betas
158
201
  self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
159
202
 
203
+ if rescale_betas_zero_snr:
204
+ # Close to 0 without being 0 so first sigma is not inf
205
+ # FP16 smallest positive subnormal works well here
206
+ self.alphas_cumprod[-1] = 2**-24
207
+
160
208
  sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
161
209
  sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
162
210
  self.sigmas = torch.from_numpy(sigmas)
@@ -168,6 +216,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
168
216
  self.is_scale_input_called = False
169
217
 
170
218
  self._step_index = None
219
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
171
220
 
172
221
  @property
173
222
  def init_noise_sigma(self):
@@ -251,6 +300,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
251
300
 
252
301
  self.timesteps = torch.from_numpy(timesteps).to(device=device)
253
302
  self._step_index = None
303
+ self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
254
304
 
255
305
  # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
256
306
  def _init_step_index(self, timestep):
@@ -327,6 +377,9 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
327
377
 
328
378
  sigma = self.sigmas[self.step_index]
329
379
 
380
+ # Upcast to avoid precision issues when computing prev_sample
381
+ sample = sample.to(torch.float32)
382
+
330
383
  # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
331
384
  if self.config.prediction_type == "epsilon":
332
385
  pred_original_sample = sample - sigma * model_output
@@ -357,6 +410,9 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
357
410
 
358
411
  prev_sample = prev_sample + noise * sigma_up
359
412
 
413
+ # Cast sample back to model compatible dtype
414
+ prev_sample = prev_sample.to(model_output.dtype)
415
+
360
416
  # upon completion increase step index by one
361
417
  self._step_index += 1
362
418