diffusers 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +26 -2
- diffusers/commands/fp16_safetensors.py +10 -11
- diffusers/configuration_utils.py +13 -8
- diffusers/dependency_versions_check.py +0 -1
- diffusers/dependency_versions_table.py +5 -5
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/image_processor.py +463 -51
- diffusers/loaders/__init__.py +82 -0
- diffusers/loaders/ip_adapter.py +159 -0
- diffusers/loaders/lora.py +1553 -0
- diffusers/loaders/lora_conversion_utils.py +284 -0
- diffusers/loaders/single_file.py +637 -0
- diffusers/loaders/textual_inversion.py +455 -0
- diffusers/loaders/unet.py +828 -0
- diffusers/loaders/utils.py +59 -0
- diffusers/models/__init__.py +26 -9
- diffusers/models/activations.py +9 -6
- diffusers/models/attention.py +301 -29
- diffusers/models/attention_flax.py +9 -1
- diffusers/models/attention_processor.py +378 -6
- diffusers/models/autoencoders/__init__.py +5 -0
- diffusers/models/{autoencoder_asym_kl.py → autoencoders/autoencoder_asym_kl.py} +17 -12
- diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +47 -23
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +402 -0
- diffusers/models/{autoencoder_tiny.py → autoencoders/autoencoder_tiny.py} +24 -28
- diffusers/models/{consistency_decoder_vae.py → autoencoders/consistency_decoder_vae.py} +51 -44
- diffusers/models/{vae.py → autoencoders/vae.py} +71 -17
- diffusers/models/controlnet.py +59 -39
- diffusers/models/controlnet_flax.py +19 -18
- diffusers/models/downsampling.py +338 -0
- diffusers/models/embeddings.py +112 -29
- diffusers/models/embeddings_flax.py +2 -0
- diffusers/models/lora.py +131 -1
- diffusers/models/modeling_flax_utils.py +14 -8
- diffusers/models/modeling_outputs.py +17 -0
- diffusers/models/modeling_utils.py +37 -29
- diffusers/models/normalization.py +110 -4
- diffusers/models/resnet.py +299 -652
- diffusers/models/transformer_2d.py +22 -5
- diffusers/models/transformer_temporal.py +183 -1
- diffusers/models/unet_2d_blocks_flax.py +5 -0
- diffusers/models/unet_2d_condition.py +46 -0
- diffusers/models/unet_2d_condition_flax.py +13 -13
- diffusers/models/unet_3d_blocks.py +957 -173
- diffusers/models/unet_3d_condition.py +16 -8
- diffusers/models/unet_kandinsky3.py +535 -0
- diffusers/models/unet_motion_model.py +48 -33
- diffusers/models/unet_spatio_temporal_condition.py +489 -0
- diffusers/models/upsampling.py +454 -0
- diffusers/models/uvit_2d.py +471 -0
- diffusers/models/vae_flax.py +7 -0
- diffusers/models/vq_model.py +12 -3
- diffusers/optimization.py +16 -9
- diffusers/pipelines/__init__.py +137 -76
- diffusers/pipelines/amused/__init__.py +62 -0
- diffusers/pipelines/amused/pipeline_amused.py +328 -0
- diffusers/pipelines/amused/pipeline_amused_img2img.py +347 -0
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +378 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +66 -8
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -0
- diffusers/pipelines/auto_pipeline.py +23 -13
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +238 -35
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +148 -37
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +155 -41
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +123 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +216 -39
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +106 -34
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +1 -0
- diffusers/pipelines/ddim/pipeline_ddim.py +1 -0
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +13 -1
- diffusers/pipelines/deprecated/__init__.py +153 -0
- diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/__init__.py +3 -3
- diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion.py +177 -34
- diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion_img2img.py +182 -37
- diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_output.py +1 -1
- diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/__init__.py +1 -1
- diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/mel.py +2 -2
- diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/pipeline_audio_diffusion.py +4 -4
- diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/__init__.py +1 -1
- diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/pipeline_latent_diffusion_uncond.py +4 -4
- diffusers/pipelines/{pndm → deprecated/pndm}/__init__.py +1 -1
- diffusers/pipelines/{pndm → deprecated/pndm}/pipeline_pndm.py +4 -4
- diffusers/pipelines/{repaint → deprecated/repaint}/__init__.py +1 -1
- diffusers/pipelines/{repaint → deprecated/repaint}/pipeline_repaint.py +5 -5
- diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/__init__.py +1 -1
- diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/pipeline_score_sde_ve.py +5 -4
- diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/__init__.py +6 -6
- diffusers/pipelines/{spectrogram_diffusion/continous_encoder.py → deprecated/spectrogram_diffusion/continuous_encoder.py} +2 -2
- diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/midi_utils.py +1 -1
- diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/notes_encoder.py +2 -2
- diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/pipeline_spectrogram_diffusion.py +8 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py +55 -0
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_cycle_diffusion.py +34 -13
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_onnx_stable_diffusion_inpaint_legacy.py +7 -6
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_inpaint_legacy.py +12 -11
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_model_editing.py +17 -11
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_paradigms.py +11 -10
- diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_pix2pix_zero.py +14 -13
- diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/__init__.py +1 -1
- diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/pipeline_stochastic_karras_ve.py +4 -4
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/__init__.py +3 -3
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py +83 -51
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py +4 -4
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py +7 -6
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py +7 -6
- diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py +7 -6
- diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py +3 -3
- diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +1 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +3 -3
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +1 -1
- diffusers/pipelines/kandinsky3/__init__.py +49 -0
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +98 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +589 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +654 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +111 -11
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +102 -9
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +1 -1
- diffusers/pipelines/onnx_utils.py +8 -5
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +7 -2
- diffusers/pipelines/pipeline_flax_utils.py +11 -8
- diffusers/pipelines/pipeline_utils.py +63 -42
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +247 -38
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/__init__.py +37 -65
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +75 -78
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +2 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +174 -11
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +8 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +178 -11
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +224 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +74 -20
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +7 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -0
- diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_attend_and_excite}/pipeline_stable_diffusion_attend_and_excite.py +6 -2
- diffusers/pipelines/stable_diffusion_diffedit/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_diffedit}/pipeline_stable_diffusion_diffedit.py +3 -3
- diffusers/pipelines/stable_diffusion_gligen/__init__.py +50 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen.py +3 -2
- diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen_text_image.py +4 -3
- diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py +60 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_k_diffusion}/pipeline_stable_diffusion_k_diffusion.py +7 -1
- diffusers/pipelines/stable_diffusion_ldm3d/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_ldm3d}/pipeline_stable_diffusion_ldm3d.py +51 -7
- diffusers/pipelines/stable_diffusion_panorama/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_panorama}/pipeline_stable_diffusion_panorama.py +57 -8
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +58 -6
- diffusers/pipelines/stable_diffusion_sag/__init__.py +48 -0
- diffusers/pipelines/{stable_diffusion → stable_diffusion_sag}/pipeline_stable_diffusion_sag.py +68 -10
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +194 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +205 -16
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +206 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +23 -17
- diffusers/pipelines/stable_video_diffusion/__init__.py +58 -0
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +652 -0
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +108 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +115 -14
- diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +6 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +23 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +334 -10
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +1331 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +2 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -0
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +14 -4
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +9 -5
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +2 -2
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -1
- diffusers/schedulers/__init__.py +4 -4
- diffusers/schedulers/deprecated/__init__.py +50 -0
- diffusers/schedulers/{scheduling_karras_ve.py → deprecated/scheduling_karras_ve.py} +4 -4
- diffusers/schedulers/{scheduling_sde_vp.py → deprecated/scheduling_sde_vp.py} +4 -6
- diffusers/schedulers/scheduling_amused.py +162 -0
- diffusers/schedulers/scheduling_consistency_models.py +2 -0
- diffusers/schedulers/scheduling_ddim.py +1 -3
- diffusers/schedulers/scheduling_ddim_inverse.py +2 -7
- diffusers/schedulers/scheduling_ddim_parallel.py +1 -3
- diffusers/schedulers/scheduling_ddpm.py +47 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +47 -3
- diffusers/schedulers/scheduling_deis_multistep.py +28 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +28 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +28 -6
- diffusers/schedulers/scheduling_dpmsolver_sde.py +3 -3
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +28 -6
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +59 -3
- diffusers/schedulers/scheduling_euler_discrete.py +102 -16
- diffusers/schedulers/scheduling_heun_discrete.py +17 -5
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +17 -5
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +17 -5
- diffusers/schedulers/scheduling_lcm.py +123 -29
- diffusers/schedulers/scheduling_lms_discrete.py +3 -3
- diffusers/schedulers/scheduling_pndm.py +1 -3
- diffusers/schedulers/scheduling_repaint.py +1 -3
- diffusers/schedulers/scheduling_unipc_multistep.py +28 -6
- diffusers/schedulers/scheduling_utils.py +3 -1
- diffusers/schedulers/scheduling_utils_flax.py +3 -1
- diffusers/training_utils.py +1 -1
- diffusers/utils/__init__.py +1 -2
- diffusers/utils/constants.py +10 -12
- diffusers/utils/dummy_pt_objects.py +75 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +105 -0
- diffusers/utils/dynamic_modules_utils.py +18 -22
- diffusers/utils/export_utils.py +8 -3
- diffusers/utils/hub_utils.py +24 -36
- diffusers/utils/logging.py +11 -11
- diffusers/utils/outputs.py +5 -5
- diffusers/utils/peft_utils.py +88 -44
- diffusers/utils/state_dict_utils.py +8 -0
- diffusers/utils/testing_utils.py +199 -1
- diffusers/utils/torch_utils.py +4 -4
- {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/METADATA +86 -69
- diffusers-0.25.0.dist-info/RECORD +360 -0
- {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/WHEEL +1 -1
- {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/entry_points.txt +0 -1
- diffusers/loaders.py +0 -3336
- diffusers-0.23.1.dist-info/RECORD +0 -323
- /diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/modeling_roberta_series.py +0 -0
- {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/LICENSE +0 -0
- {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/top_level.txt +0 -0
@@ -91,6 +91,43 @@ def betas_for_alpha_bar(
|
|
91
91
|
return torch.tensor(betas, dtype=torch.float32)
|
92
92
|
|
93
93
|
|
94
|
+
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
95
|
+
def rescale_zero_terminal_snr(betas):
|
96
|
+
"""
|
97
|
+
Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
|
98
|
+
|
99
|
+
|
100
|
+
Args:
|
101
|
+
betas (`torch.FloatTensor`):
|
102
|
+
the betas that the scheduler is being initialized with.
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
`torch.FloatTensor`: rescaled betas with zero terminal SNR
|
106
|
+
"""
|
107
|
+
# Convert betas to alphas_bar_sqrt
|
108
|
+
alphas = 1.0 - betas
|
109
|
+
alphas_cumprod = torch.cumprod(alphas, dim=0)
|
110
|
+
alphas_bar_sqrt = alphas_cumprod.sqrt()
|
111
|
+
|
112
|
+
# Store old values.
|
113
|
+
alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
|
114
|
+
alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
|
115
|
+
|
116
|
+
# Shift so the last timestep is zero.
|
117
|
+
alphas_bar_sqrt -= alphas_bar_sqrt_T
|
118
|
+
|
119
|
+
# Scale so the first timestep is back to the old value.
|
120
|
+
alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
|
121
|
+
|
122
|
+
# Convert alphas_bar_sqrt to betas
|
123
|
+
alphas_bar = alphas_bar_sqrt**2 # Revert sqrt
|
124
|
+
alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod
|
125
|
+
alphas = torch.cat([alphas_bar[0:1], alphas])
|
126
|
+
betas = 1 - alphas
|
127
|
+
|
128
|
+
return betas
|
129
|
+
|
130
|
+
|
94
131
|
class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
95
132
|
"""
|
96
133
|
Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and
|
@@ -139,6 +176,10 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
139
176
|
an offset added to the inference steps. You can use a combination of `offset=1` and
|
140
177
|
`set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
|
141
178
|
stable diffusion.
|
179
|
+
rescale_betas_zero_snr (`bool`, defaults to `False`):
|
180
|
+
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
|
181
|
+
dark samples instead of limiting it to samples with medium brightness. Loosely related to
|
182
|
+
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
|
142
183
|
"""
|
143
184
|
|
144
185
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -163,6 +204,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
163
204
|
sample_max_value: float = 1.0,
|
164
205
|
timestep_spacing: str = "leading",
|
165
206
|
steps_offset: int = 0,
|
207
|
+
rescale_betas_zero_snr: int = False,
|
166
208
|
):
|
167
209
|
if trained_betas is not None:
|
168
210
|
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
@@ -170,9 +212,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
170
212
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
171
213
|
elif beta_schedule == "scaled_linear":
|
172
214
|
# this schedule is very specific to the latent diffusion model.
|
173
|
-
self.betas = (
|
174
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
175
|
-
)
|
215
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
176
216
|
elif beta_schedule == "squaredcos_cap_v2":
|
177
217
|
# Glide cosine schedule
|
178
218
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -183,6 +223,10 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
183
223
|
else:
|
184
224
|
raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
|
185
225
|
|
226
|
+
# Rescale for zero SNR
|
227
|
+
if rescale_betas_zero_snr:
|
228
|
+
self.betas = rescale_zero_terminal_snr(self.betas)
|
229
|
+
|
186
230
|
self.alphas = 1.0 - self.betas
|
187
231
|
self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
|
188
232
|
self.one = torch.tensor(1.0)
|
@@ -149,9 +149,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
149
149
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
150
150
|
elif beta_schedule == "scaled_linear":
|
151
151
|
# this schedule is very specific to the latent diffusion model.
|
152
|
-
self.betas = (
|
153
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
154
|
-
)
|
152
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
155
153
|
elif beta_schedule == "squaredcos_cap_v2":
|
156
154
|
# Glide cosine schedule
|
157
155
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -164,6 +162,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
164
162
|
self.alpha_t = torch.sqrt(self.alphas_cumprod)
|
165
163
|
self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
|
166
164
|
self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
|
165
|
+
self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
|
167
166
|
|
168
167
|
# standard deviation of the initial noise distribution
|
169
168
|
self.init_noise_sigma = 1.0
|
@@ -188,6 +187,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
188
187
|
self.model_outputs = [None] * solver_order
|
189
188
|
self.lower_order_nums = 0
|
190
189
|
self._step_index = None
|
190
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
191
191
|
|
192
192
|
@property
|
193
193
|
def step_index(self):
|
@@ -255,6 +255,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
255
255
|
|
256
256
|
# add an index counter for schedulers that allow duplicated timesteps
|
257
257
|
self._step_index = None
|
258
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
258
259
|
|
259
260
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
|
260
261
|
def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
|
@@ -325,8 +326,20 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
325
326
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
326
327
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
327
328
|
|
328
|
-
|
329
|
-
|
329
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
330
|
+
# TODO: Add this logic to the other schedulers
|
331
|
+
if hasattr(self.config, "sigma_min"):
|
332
|
+
sigma_min = self.config.sigma_min
|
333
|
+
else:
|
334
|
+
sigma_min = None
|
335
|
+
|
336
|
+
if hasattr(self.config, "sigma_max"):
|
337
|
+
sigma_max = self.config.sigma_max
|
338
|
+
else:
|
339
|
+
sigma_max = None
|
340
|
+
|
341
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
342
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
330
343
|
|
331
344
|
rho = 7.0 # 7.0 is the value used in the paper
|
332
345
|
ramp = np.linspace(0, 1, num_inference_steps)
|
@@ -723,7 +736,16 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
723
736
|
schedule_timesteps = self.timesteps.to(original_samples.device)
|
724
737
|
timesteps = timesteps.to(original_samples.device)
|
725
738
|
|
726
|
-
step_indices = [
|
739
|
+
step_indices = []
|
740
|
+
for timestep in timesteps:
|
741
|
+
index_candidates = (schedule_timesteps == timestep).nonzero()
|
742
|
+
if len(index_candidates) == 0:
|
743
|
+
step_index = len(schedule_timesteps) - 1
|
744
|
+
elif len(index_candidates) > 1:
|
745
|
+
step_index = index_candidates[1].item()
|
746
|
+
else:
|
747
|
+
step_index = index_candidates[0].item()
|
748
|
+
step_indices.append(step_index)
|
727
749
|
|
728
750
|
sigma = sigmas[step_indices].flatten()
|
729
751
|
while len(sigma.shape) < len(original_samples.shape):
|
@@ -176,9 +176,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
176
176
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
177
177
|
elif beta_schedule == "scaled_linear":
|
178
178
|
# this schedule is very specific to the latent diffusion model.
|
179
|
-
self.betas = (
|
180
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
181
|
-
)
|
179
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
182
180
|
elif beta_schedule == "squaredcos_cap_v2":
|
183
181
|
# Glide cosine schedule
|
184
182
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -191,6 +189,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
191
189
|
self.alpha_t = torch.sqrt(self.alphas_cumprod)
|
192
190
|
self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
|
193
191
|
self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
|
192
|
+
self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
|
194
193
|
|
195
194
|
# standard deviation of the initial noise distribution
|
196
195
|
self.init_noise_sigma = 1.0
|
@@ -215,6 +214,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
215
214
|
self.model_outputs = [None] * solver_order
|
216
215
|
self.lower_order_nums = 0
|
217
216
|
self._step_index = None
|
217
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
218
218
|
|
219
219
|
@property
|
220
220
|
def step_index(self):
|
@@ -291,6 +291,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
291
291
|
|
292
292
|
# add an index counter for schedulers that allow duplicated timesteps
|
293
293
|
self._step_index = None
|
294
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
294
295
|
|
295
296
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
|
296
297
|
def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
|
@@ -360,8 +361,20 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
360
361
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
361
362
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
362
363
|
|
363
|
-
|
364
|
-
|
364
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
365
|
+
# TODO: Add this logic to the other schedulers
|
366
|
+
if hasattr(self.config, "sigma_min"):
|
367
|
+
sigma_min = self.config.sigma_min
|
368
|
+
else:
|
369
|
+
sigma_min = None
|
370
|
+
|
371
|
+
if hasattr(self.config, "sigma_max"):
|
372
|
+
sigma_max = self.config.sigma_max
|
373
|
+
else:
|
374
|
+
sigma_max = None
|
375
|
+
|
376
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
377
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
365
378
|
|
366
379
|
rho = 7.0 # 7.0 is the value used in the paper
|
367
380
|
ramp = np.linspace(0, 1, num_inference_steps)
|
@@ -885,7 +898,16 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
885
898
|
schedule_timesteps = self.timesteps.to(original_samples.device)
|
886
899
|
timesteps = timesteps.to(original_samples.device)
|
887
900
|
|
888
|
-
step_indices = [
|
901
|
+
step_indices = []
|
902
|
+
for timestep in timesteps:
|
903
|
+
index_candidates = (schedule_timesteps == timestep).nonzero()
|
904
|
+
if len(index_candidates) == 0:
|
905
|
+
step_index = len(schedule_timesteps) - 1
|
906
|
+
elif len(index_candidates) > 1:
|
907
|
+
step_index = index_candidates[1].item()
|
908
|
+
else:
|
909
|
+
step_index = index_candidates[0].item()
|
910
|
+
step_indices.append(step_index)
|
889
911
|
|
890
912
|
sigma = sigmas[step_indices].flatten()
|
891
913
|
while len(sigma.shape) < len(original_samples.shape):
|
@@ -171,9 +171,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
171
171
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
172
172
|
elif beta_schedule == "scaled_linear":
|
173
173
|
# this schedule is very specific to the latent diffusion model.
|
174
|
-
self.betas = (
|
175
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
176
|
-
)
|
174
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
177
175
|
elif beta_schedule == "squaredcos_cap_v2":
|
178
176
|
# Glide cosine schedule
|
179
177
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -186,6 +184,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
186
184
|
self.alpha_t = torch.sqrt(self.alphas_cumprod)
|
187
185
|
self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
|
188
186
|
self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
|
187
|
+
self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
|
189
188
|
|
190
189
|
# standard deviation of the initial noise distribution
|
191
190
|
self.init_noise_sigma = 1.0
|
@@ -210,6 +209,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
210
209
|
self.model_outputs = [None] * solver_order
|
211
210
|
self.lower_order_nums = 0
|
212
211
|
self._step_index = None
|
212
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
213
213
|
self.use_karras_sigmas = use_karras_sigmas
|
214
214
|
|
215
215
|
@property
|
@@ -290,6 +290,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
290
290
|
|
291
291
|
# add an index counter for schedulers that allow duplicated timesteps
|
292
292
|
self._step_index = None
|
293
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
293
294
|
|
294
295
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
|
295
296
|
def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
|
@@ -360,8 +361,20 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
360
361
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
361
362
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
362
363
|
|
363
|
-
|
364
|
-
|
364
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
365
|
+
# TODO: Add this logic to the other schedulers
|
366
|
+
if hasattr(self.config, "sigma_min"):
|
367
|
+
sigma_min = self.config.sigma_min
|
368
|
+
else:
|
369
|
+
sigma_min = None
|
370
|
+
|
371
|
+
if hasattr(self.config, "sigma_max"):
|
372
|
+
sigma_max = self.config.sigma_max
|
373
|
+
else:
|
374
|
+
sigma_max = None
|
375
|
+
|
376
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
377
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
365
378
|
|
366
379
|
rho = 7.0 # 7.0 is the value used in the paper
|
367
380
|
ramp = np.linspace(0, 1, num_inference_steps)
|
@@ -880,7 +893,16 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
880
893
|
schedule_timesteps = self.timesteps.to(original_samples.device)
|
881
894
|
timesteps = timesteps.to(original_samples.device)
|
882
895
|
|
883
|
-
step_indices = [
|
896
|
+
step_indices = []
|
897
|
+
for timestep in timesteps:
|
898
|
+
index_candidates = (schedule_timesteps == timestep).nonzero()
|
899
|
+
if len(index_candidates) == 0:
|
900
|
+
step_index = len(schedule_timesteps) - 1
|
901
|
+
elif len(index_candidates) > 1:
|
902
|
+
step_index = index_candidates[1].item()
|
903
|
+
else:
|
904
|
+
step_index = index_candidates[0].item()
|
905
|
+
step_indices.append(step_index)
|
884
906
|
|
885
907
|
sigma = sigmas[step_indices].flatten()
|
886
908
|
while len(sigma.shape) < len(original_samples.shape):
|
@@ -182,9 +182,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
|
182
182
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
183
183
|
elif beta_schedule == "scaled_linear":
|
184
184
|
# this schedule is very specific to the latent diffusion model.
|
185
|
-
self.betas = (
|
186
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
187
|
-
)
|
185
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
188
186
|
elif beta_schedule == "squaredcos_cap_v2":
|
189
187
|
# Glide cosine schedule
|
190
188
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -200,6 +198,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
|
200
198
|
self.noise_sampler = None
|
201
199
|
self.noise_sampler_seed = noise_sampler_seed
|
202
200
|
self._step_index = None
|
201
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
203
202
|
|
204
203
|
# Copied from diffusers.schedulers.scheduling_heun_discrete.HeunDiscreteScheduler.index_for_timestep
|
205
204
|
def index_for_timestep(self, timestep, schedule_timesteps=None):
|
@@ -349,6 +348,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
|
349
348
|
self.mid_point_sigma = None
|
350
349
|
|
351
350
|
self._step_index = None
|
351
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
352
352
|
self.noise_sampler = None
|
353
353
|
|
354
354
|
# for exp beta schedules, such as the one for `pipeline_shap_e.py`
|
@@ -159,9 +159,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
|
159
159
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
160
160
|
elif beta_schedule == "scaled_linear":
|
161
161
|
# this schedule is very specific to the latent diffusion model.
|
162
|
-
self.betas = (
|
163
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
164
|
-
)
|
162
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
165
163
|
elif beta_schedule == "squaredcos_cap_v2":
|
166
164
|
# Glide cosine schedule
|
167
165
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -174,6 +172,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
|
174
172
|
self.alpha_t = torch.sqrt(self.alphas_cumprod)
|
175
173
|
self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
|
176
174
|
self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
|
175
|
+
self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
|
177
176
|
|
178
177
|
# standard deviation of the initial noise distribution
|
179
178
|
self.init_noise_sigma = 1.0
|
@@ -198,6 +197,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
|
198
197
|
self.sample = None
|
199
198
|
self.order_list = self.get_order_list(num_train_timesteps)
|
200
199
|
self._step_index = None
|
200
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
201
201
|
|
202
202
|
def get_order_list(self, num_inference_steps: int) -> List[int]:
|
203
203
|
"""
|
@@ -289,6 +289,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
|
289
289
|
|
290
290
|
# add an index counter for schedulers that allow duplicated timesteps
|
291
291
|
self._step_index = None
|
292
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
292
293
|
|
293
294
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
|
294
295
|
def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
|
@@ -359,8 +360,20 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
|
359
360
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
360
361
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
361
362
|
|
362
|
-
|
363
|
-
|
363
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
364
|
+
# TODO: Add this logic to the other schedulers
|
365
|
+
if hasattr(self.config, "sigma_min"):
|
366
|
+
sigma_min = self.config.sigma_min
|
367
|
+
else:
|
368
|
+
sigma_min = None
|
369
|
+
|
370
|
+
if hasattr(self.config, "sigma_max"):
|
371
|
+
sigma_max = self.config.sigma_max
|
372
|
+
else:
|
373
|
+
sigma_max = None
|
374
|
+
|
375
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
376
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
364
377
|
|
365
378
|
rho = 7.0 # 7.0 is the value used in the paper
|
366
379
|
ramp = np.linspace(0, 1, num_inference_steps)
|
@@ -886,7 +899,16 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
|
886
899
|
schedule_timesteps = self.timesteps.to(original_samples.device)
|
887
900
|
timesteps = timesteps.to(original_samples.device)
|
888
901
|
|
889
|
-
step_indices = [
|
902
|
+
step_indices = []
|
903
|
+
for timestep in timesteps:
|
904
|
+
index_candidates = (schedule_timesteps == timestep).nonzero()
|
905
|
+
if len(index_candidates) == 0:
|
906
|
+
step_index = len(schedule_timesteps) - 1
|
907
|
+
elif len(index_candidates) > 1:
|
908
|
+
step_index = index_candidates[1].item()
|
909
|
+
else:
|
910
|
+
step_index = index_candidates[0].item()
|
911
|
+
step_indices.append(step_index)
|
890
912
|
|
891
913
|
sigma = sigmas[step_indices].flatten()
|
892
914
|
while len(sigma.shape) < len(original_samples.shape):
|
@@ -92,6 +92,43 @@ def betas_for_alpha_bar(
|
|
92
92
|
return torch.tensor(betas, dtype=torch.float32)
|
93
93
|
|
94
94
|
|
95
|
+
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
96
|
+
def rescale_zero_terminal_snr(betas):
|
97
|
+
"""
|
98
|
+
Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
|
99
|
+
|
100
|
+
|
101
|
+
Args:
|
102
|
+
betas (`torch.FloatTensor`):
|
103
|
+
the betas that the scheduler is being initialized with.
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
`torch.FloatTensor`: rescaled betas with zero terminal SNR
|
107
|
+
"""
|
108
|
+
# Convert betas to alphas_bar_sqrt
|
109
|
+
alphas = 1.0 - betas
|
110
|
+
alphas_cumprod = torch.cumprod(alphas, dim=0)
|
111
|
+
alphas_bar_sqrt = alphas_cumprod.sqrt()
|
112
|
+
|
113
|
+
# Store old values.
|
114
|
+
alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
|
115
|
+
alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
|
116
|
+
|
117
|
+
# Shift so the last timestep is zero.
|
118
|
+
alphas_bar_sqrt -= alphas_bar_sqrt_T
|
119
|
+
|
120
|
+
# Scale so the first timestep is back to the old value.
|
121
|
+
alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
|
122
|
+
|
123
|
+
# Convert alphas_bar_sqrt to betas
|
124
|
+
alphas_bar = alphas_bar_sqrt**2 # Revert sqrt
|
125
|
+
alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod
|
126
|
+
alphas = torch.cat([alphas_bar[0:1], alphas])
|
127
|
+
betas = 1 - alphas
|
128
|
+
|
129
|
+
return betas
|
130
|
+
|
131
|
+
|
95
132
|
class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
96
133
|
"""
|
97
134
|
Ancestral sampling with Euler method steps.
|
@@ -122,6 +159,10 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
122
159
|
An offset added to the inference steps. You can use a combination of `offset=1` and
|
123
160
|
`set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
|
124
161
|
Diffusion.
|
162
|
+
rescale_betas_zero_snr (`bool`, defaults to `False`):
|
163
|
+
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
|
164
|
+
dark samples instead of limiting it to samples with medium brightness. Loosely related to
|
165
|
+
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
|
125
166
|
"""
|
126
167
|
|
127
168
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -138,6 +179,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
138
179
|
prediction_type: str = "epsilon",
|
139
180
|
timestep_spacing: str = "linspace",
|
140
181
|
steps_offset: int = 0,
|
182
|
+
rescale_betas_zero_snr: bool = False,
|
141
183
|
):
|
142
184
|
if trained_betas is not None:
|
143
185
|
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
@@ -145,18 +187,24 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
145
187
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
146
188
|
elif beta_schedule == "scaled_linear":
|
147
189
|
# this schedule is very specific to the latent diffusion model.
|
148
|
-
self.betas = (
|
149
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
150
|
-
)
|
190
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
151
191
|
elif beta_schedule == "squaredcos_cap_v2":
|
152
192
|
# Glide cosine schedule
|
153
193
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
154
194
|
else:
|
155
195
|
raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
|
156
196
|
|
197
|
+
if rescale_betas_zero_snr:
|
198
|
+
self.betas = rescale_zero_terminal_snr(self.betas)
|
199
|
+
|
157
200
|
self.alphas = 1.0 - self.betas
|
158
201
|
self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
|
159
202
|
|
203
|
+
if rescale_betas_zero_snr:
|
204
|
+
# Close to 0 without being 0 so first sigma is not inf
|
205
|
+
# FP16 smallest positive subnormal works well here
|
206
|
+
self.alphas_cumprod[-1] = 2**-24
|
207
|
+
|
160
208
|
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
161
209
|
sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
|
162
210
|
self.sigmas = torch.from_numpy(sigmas)
|
@@ -168,6 +216,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
168
216
|
self.is_scale_input_called = False
|
169
217
|
|
170
218
|
self._step_index = None
|
219
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
171
220
|
|
172
221
|
@property
|
173
222
|
def init_noise_sigma(self):
|
@@ -251,6 +300,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
251
300
|
|
252
301
|
self.timesteps = torch.from_numpy(timesteps).to(device=device)
|
253
302
|
self._step_index = None
|
303
|
+
self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
254
304
|
|
255
305
|
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
|
256
306
|
def _init_step_index(self, timestep):
|
@@ -327,6 +377,9 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
327
377
|
|
328
378
|
sigma = self.sigmas[self.step_index]
|
329
379
|
|
380
|
+
# Upcast to avoid precision issues when computing prev_sample
|
381
|
+
sample = sample.to(torch.float32)
|
382
|
+
|
330
383
|
# 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
|
331
384
|
if self.config.prediction_type == "epsilon":
|
332
385
|
pred_original_sample = sample - sigma * model_output
|
@@ -357,6 +410,9 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
357
410
|
|
358
411
|
prev_sample = prev_sample + noise * sigma_up
|
359
412
|
|
413
|
+
# Cast sample back to model compatible dtype
|
414
|
+
prev_sample = prev_sample.to(model_output.dtype)
|
415
|
+
|
360
416
|
# upon completion increase step index by one
|
361
417
|
self._step_index += 1
|
362
418
|
|