diffusers 0.23.0__py3-none-any.whl → 0.24.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +16 -2
- diffusers/configuration_utils.py +1 -0
- diffusers/dependency_versions_check.py +1 -14
- diffusers/dependency_versions_table.py +5 -4
- diffusers/image_processor.py +186 -14
- diffusers/loaders/__init__.py +82 -0
- diffusers/loaders/ip_adapter.py +157 -0
- diffusers/loaders/lora.py +1415 -0
- diffusers/loaders/lora_conversion_utils.py +284 -0
- diffusers/loaders/single_file.py +631 -0
- diffusers/loaders/textual_inversion.py +459 -0
- diffusers/loaders/unet.py +735 -0
- diffusers/loaders/utils.py +59 -0
- diffusers/models/__init__.py +12 -1
- diffusers/models/attention.py +165 -14
- diffusers/models/attention_flax.py +9 -1
- diffusers/models/attention_processor.py +286 -1
- diffusers/models/autoencoder_asym_kl.py +14 -9
- diffusers/models/autoencoder_kl.py +3 -18
- diffusers/models/autoencoder_kl_temporal_decoder.py +402 -0
- diffusers/models/autoencoder_tiny.py +20 -24
- diffusers/models/consistency_decoder_vae.py +37 -30
- diffusers/models/controlnet.py +59 -39
- diffusers/models/controlnet_flax.py +19 -18
- diffusers/models/embeddings_flax.py +2 -0
- diffusers/models/lora.py +131 -1
- diffusers/models/modeling_flax_utils.py +2 -1
- diffusers/models/modeling_outputs.py +17 -0
- diffusers/models/modeling_utils.py +27 -19
- diffusers/models/normalization.py +2 -2
- diffusers/models/resnet.py +390 -59
- diffusers/models/transformer_2d.py +20 -3
- diffusers/models/transformer_temporal.py +183 -1
- diffusers/models/unet_2d_blocks_flax.py +5 -0
- diffusers/models/unet_2d_condition.py +9 -0
- diffusers/models/unet_2d_condition_flax.py +13 -13
- diffusers/models/unet_3d_blocks.py +957 -173
- diffusers/models/unet_3d_condition.py +16 -8
- diffusers/models/unet_kandi3.py +589 -0
- diffusers/models/unet_motion_model.py +48 -33
- diffusers/models/unet_spatio_temporal_condition.py +489 -0
- diffusers/models/vae.py +63 -13
- diffusers/models/vae_flax.py +7 -0
- diffusers/models/vq_model.py +3 -1
- diffusers/optimization.py +16 -9
- diffusers/pipelines/__init__.py +65 -12
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +93 -23
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +97 -25
- diffusers/pipelines/animatediff/pipeline_animatediff.py +34 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -0
- diffusers/pipelines/auto_pipeline.py +6 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +217 -31
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +101 -32
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +136 -39
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +119 -37
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +196 -35
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +102 -31
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +1 -0
- diffusers/pipelines/ddim/pipeline_ddim.py +1 -0
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +13 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +13 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +3 -3
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +1 -1
- diffusers/pipelines/kandinsky3/__init__.py +49 -0
- diffusers/pipelines/kandinsky3/kandinsky3_pipeline.py +452 -0
- diffusers/pipelines/kandinsky3/kandinsky3img2img_pipeline.py +460 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +65 -6
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +55 -3
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +7 -2
- diffusers/pipelines/pipeline_flax_utils.py +4 -2
- diffusers/pipelines/pipeline_utils.py +33 -13
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +196 -36
- diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py +1 -0
- diffusers/pipelines/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -0
- diffusers/pipelines/stable_diffusion/__init__.py +64 -21
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +8 -3
- diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +18 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +2 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +88 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +8 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +92 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +92 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +1 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +1 -0
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +103 -8
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +113 -8
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +115 -9
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -12
- diffusers/pipelines/stable_video_diffusion/__init__.py +58 -0
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +649 -0
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +108 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +109 -14
- diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +1 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +18 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +4 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +872 -0
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +29 -40
- diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -0
- diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -0
- diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +14 -4
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +9 -5
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +2 -2
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +1 -1
- diffusers/schedulers/__init__.py +2 -4
- diffusers/schedulers/deprecated/__init__.py +50 -0
- diffusers/schedulers/{scheduling_karras_ve.py → deprecated/scheduling_karras_ve.py} +4 -4
- diffusers/schedulers/{scheduling_sde_vp.py → deprecated/scheduling_sde_vp.py} +4 -6
- diffusers/schedulers/scheduling_ddim.py +1 -3
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -3
- diffusers/schedulers/scheduling_ddim_parallel.py +1 -3
- diffusers/schedulers/scheduling_ddpm.py +1 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -3
- diffusers/schedulers/scheduling_deis_multistep.py +15 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +15 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +15 -5
- diffusers/schedulers/scheduling_dpmsolver_sde.py +1 -3
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +15 -5
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +1 -3
- diffusers/schedulers/scheduling_euler_discrete.py +40 -13
- diffusers/schedulers/scheduling_heun_discrete.py +15 -5
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +15 -5
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +15 -5
- diffusers/schedulers/scheduling_lcm.py +123 -29
- diffusers/schedulers/scheduling_lms_discrete.py +1 -3
- diffusers/schedulers/scheduling_pndm.py +1 -3
- diffusers/schedulers/scheduling_repaint.py +1 -3
- diffusers/schedulers/scheduling_unipc_multistep.py +15 -5
- diffusers/utils/__init__.py +1 -0
- diffusers/utils/constants.py +11 -6
- diffusers/utils/dummy_pt_objects.py +45 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +60 -0
- diffusers/utils/dynamic_modules_utils.py +4 -4
- diffusers/utils/export_utils.py +8 -3
- diffusers/utils/logging.py +10 -10
- diffusers/utils/outputs.py +5 -5
- diffusers/utils/peft_utils.py +88 -44
- diffusers/utils/torch_utils.py +2 -2
- diffusers/utils/versions.py +117 -0
- {diffusers-0.23.0.dist-info → diffusers-0.24.0.dist-info}/METADATA +83 -64
- {diffusers-0.23.0.dist-info → diffusers-0.24.0.dist-info}/RECORD +176 -157
- {diffusers-0.23.0.dist-info → diffusers-0.24.0.dist-info}/WHEEL +1 -1
- {diffusers-0.23.0.dist-info → diffusers-0.24.0.dist-info}/entry_points.txt +1 -0
- diffusers/loaders.py +0 -3336
- {diffusers-0.23.0.dist-info → diffusers-0.24.0.dist-info}/LICENSE +0 -0
- {diffusers-0.23.0.dist-info → diffusers-0.24.0.dist-info}/top_level.txt +0 -0
@@ -182,9 +182,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
|
182
182
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
183
183
|
elif beta_schedule == "scaled_linear":
|
184
184
|
# this schedule is very specific to the latent diffusion model.
|
185
|
-
self.betas = (
|
186
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
187
|
-
)
|
185
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
188
186
|
elif beta_schedule == "squaredcos_cap_v2":
|
189
187
|
# Glide cosine schedule
|
190
188
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -159,9 +159,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
|
159
159
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
160
160
|
elif beta_schedule == "scaled_linear":
|
161
161
|
# this schedule is very specific to the latent diffusion model.
|
162
|
-
self.betas = (
|
163
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
164
|
-
)
|
162
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
165
163
|
elif beta_schedule == "squaredcos_cap_v2":
|
166
164
|
# Glide cosine schedule
|
167
165
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -359,8 +357,20 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
|
359
357
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
360
358
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
361
359
|
|
362
|
-
|
363
|
-
|
360
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
361
|
+
# TODO: Add this logic to the other schedulers
|
362
|
+
if hasattr(self.config, "sigma_min"):
|
363
|
+
sigma_min = self.config.sigma_min
|
364
|
+
else:
|
365
|
+
sigma_min = None
|
366
|
+
|
367
|
+
if hasattr(self.config, "sigma_max"):
|
368
|
+
sigma_max = self.config.sigma_max
|
369
|
+
else:
|
370
|
+
sigma_max = None
|
371
|
+
|
372
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
373
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
364
374
|
|
365
375
|
rho = 7.0 # 7.0 is the value used in the paper
|
366
376
|
ramp = np.linspace(0, 1, num_inference_steps)
|
@@ -145,9 +145,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
145
145
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
146
146
|
elif beta_schedule == "scaled_linear":
|
147
147
|
# this schedule is very specific to the latent diffusion model.
|
148
|
-
self.betas = (
|
149
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
150
|
-
)
|
148
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
151
149
|
elif beta_schedule == "squaredcos_cap_v2":
|
152
150
|
# Glide cosine schedule
|
153
151
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -144,7 +144,10 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
144
144
|
prediction_type: str = "epsilon",
|
145
145
|
interpolation_type: str = "linear",
|
146
146
|
use_karras_sigmas: Optional[bool] = False,
|
147
|
+
sigma_min: Optional[float] = None,
|
148
|
+
sigma_max: Optional[float] = None,
|
147
149
|
timestep_spacing: str = "linspace",
|
150
|
+
timestep_type: str = "discrete", # can be "discrete" or "continuous"
|
148
151
|
steps_offset: int = 0,
|
149
152
|
):
|
150
153
|
if trained_betas is not None:
|
@@ -153,9 +156,7 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
153
156
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
154
157
|
elif beta_schedule == "scaled_linear":
|
155
158
|
# this schedule is very specific to the latent diffusion model.
|
156
|
-
self.betas = (
|
157
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
158
|
-
)
|
159
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
159
160
|
elif beta_schedule == "squaredcos_cap_v2":
|
160
161
|
# Glide cosine schedule
|
161
162
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -166,13 +167,22 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
166
167
|
self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
|
167
168
|
|
168
169
|
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
169
|
-
|
170
|
-
|
170
|
+
timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=float)[::-1].copy()
|
171
|
+
|
172
|
+
sigmas = torch.from_numpy(sigmas[::-1].copy()).to(dtype=torch.float32)
|
173
|
+
timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32)
|
171
174
|
|
172
175
|
# setable values
|
173
176
|
self.num_inference_steps = None
|
174
|
-
|
175
|
-
|
177
|
+
|
178
|
+
# TODO: Support the full EDM scalings for all prediction types and timestep types
|
179
|
+
if timestep_type == "continuous" and prediction_type == "v_prediction":
|
180
|
+
self.timesteps = torch.Tensor([0.25 * sigma.log() for sigma in sigmas])
|
181
|
+
else:
|
182
|
+
self.timesteps = timesteps
|
183
|
+
|
184
|
+
self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)])
|
185
|
+
|
176
186
|
self.is_scale_input_called = False
|
177
187
|
self.use_karras_sigmas = use_karras_sigmas
|
178
188
|
|
@@ -270,10 +280,15 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
270
280
|
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
271
281
|
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
272
282
|
|
273
|
-
sigmas =
|
274
|
-
|
283
|
+
sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device)
|
284
|
+
|
285
|
+
# TODO: Support the full EDM scalings for all prediction types and timestep types
|
286
|
+
if self.config.timestep_type == "continuous" and self.config.prediction_type == "v_prediction":
|
287
|
+
self.timesteps = torch.Tensor([0.25 * sigma.log() for sigma in sigmas]).to(device=device)
|
288
|
+
else:
|
289
|
+
self.timesteps = torch.from_numpy(timesteps.astype(np.float32)).to(device=device)
|
275
290
|
|
276
|
-
self.
|
291
|
+
self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)])
|
277
292
|
self._step_index = None
|
278
293
|
|
279
294
|
def _sigma_to_t(self, sigma, log_sigmas):
|
@@ -303,8 +318,20 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
303
318
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
304
319
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
305
320
|
|
306
|
-
|
307
|
-
|
321
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
322
|
+
# TODO: Add this logic to the other schedulers
|
323
|
+
if hasattr(self.config, "sigma_min"):
|
324
|
+
sigma_min = self.config.sigma_min
|
325
|
+
else:
|
326
|
+
sigma_min = None
|
327
|
+
|
328
|
+
if hasattr(self.config, "sigma_max"):
|
329
|
+
sigma_max = self.config.sigma_max
|
330
|
+
else:
|
331
|
+
sigma_max = None
|
332
|
+
|
333
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
334
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
308
335
|
|
309
336
|
rho = 7.0 # 7.0 is the value used in the paper
|
310
337
|
ramp = np.linspace(0, 1, num_inference_steps)
|
@@ -414,7 +441,7 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
414
441
|
elif self.config.prediction_type == "epsilon":
|
415
442
|
pred_original_sample = sample - sigma_hat * model_output
|
416
443
|
elif self.config.prediction_type == "v_prediction":
|
417
|
-
# * c_out + input * c_skip
|
444
|
+
# denoised = model_output * c_out + input * c_skip
|
418
445
|
pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1))
|
419
446
|
else:
|
420
447
|
raise ValueError(
|
@@ -131,9 +131,7 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
131
131
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
132
132
|
elif beta_schedule == "scaled_linear":
|
133
133
|
# this schedule is very specific to the latent diffusion model.
|
134
|
-
self.betas = (
|
135
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
136
|
-
)
|
134
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
137
135
|
elif beta_schedule == "squaredcos_cap_v2":
|
138
136
|
# Glide cosine schedule
|
139
137
|
self.betas = betas_for_alpha_bar(num_train_timesteps, alpha_transform_type="cosine")
|
@@ -305,8 +303,20 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
305
303
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
306
304
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
307
305
|
|
308
|
-
|
309
|
-
|
306
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
307
|
+
# TODO: Add this logic to the other schedulers
|
308
|
+
if hasattr(self.config, "sigma_min"):
|
309
|
+
sigma_min = self.config.sigma_min
|
310
|
+
else:
|
311
|
+
sigma_min = None
|
312
|
+
|
313
|
+
if hasattr(self.config, "sigma_max"):
|
314
|
+
sigma_max = self.config.sigma_max
|
315
|
+
else:
|
316
|
+
sigma_max = None
|
317
|
+
|
318
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
319
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
310
320
|
|
311
321
|
rho = 7.0 # 7.0 is the value used in the paper
|
312
322
|
ramp = np.linspace(0, 1, num_inference_steps)
|
@@ -127,9 +127,7 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
127
127
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
128
128
|
elif beta_schedule == "scaled_linear":
|
129
129
|
# this schedule is very specific to the latent diffusion model.
|
130
|
-
self.betas = (
|
131
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
132
|
-
)
|
130
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
133
131
|
elif beta_schedule == "squaredcos_cap_v2":
|
134
132
|
# Glide cosine schedule
|
135
133
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -326,8 +324,20 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
326
324
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
327
325
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
328
326
|
|
329
|
-
|
330
|
-
|
327
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
328
|
+
# TODO: Add this logic to the other schedulers
|
329
|
+
if hasattr(self.config, "sigma_min"):
|
330
|
+
sigma_min = self.config.sigma_min
|
331
|
+
else:
|
332
|
+
sigma_min = None
|
333
|
+
|
334
|
+
if hasattr(self.config, "sigma_max"):
|
335
|
+
sigma_max = self.config.sigma_max
|
336
|
+
else:
|
337
|
+
sigma_max = None
|
338
|
+
|
339
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
340
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
331
341
|
|
332
342
|
rho = 7.0 # 7.0 is the value used in the paper
|
333
343
|
ramp = np.linspace(0, 1, num_inference_steps)
|
@@ -126,9 +126,7 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
126
126
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
127
127
|
elif beta_schedule == "scaled_linear":
|
128
128
|
# this schedule is very specific to the latent diffusion model.
|
129
|
-
self.betas = (
|
130
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
131
|
-
)
|
129
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
132
130
|
elif beta_schedule == "squaredcos_cap_v2":
|
133
131
|
# Glide cosine schedule
|
134
132
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -337,8 +335,20 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
337
335
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
338
336
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
339
337
|
|
340
|
-
|
341
|
-
|
338
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
339
|
+
# TODO: Add this logic to the other schedulers
|
340
|
+
if hasattr(self.config, "sigma_min"):
|
341
|
+
sigma_min = self.config.sigma_min
|
342
|
+
else:
|
343
|
+
sigma_min = None
|
344
|
+
|
345
|
+
if hasattr(self.config, "sigma_max"):
|
346
|
+
sigma_max = self.config.sigma_max
|
347
|
+
else:
|
348
|
+
sigma_max = None
|
349
|
+
|
350
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
351
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
342
352
|
|
343
353
|
rho = 7.0 # 7.0 is the value used in the paper
|
344
354
|
ramp = np.linspace(0, 1, num_inference_steps)
|
@@ -221,9 +221,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
|
221
221
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
222
222
|
elif beta_schedule == "scaled_linear":
|
223
223
|
# this schedule is very specific to the latent diffusion model.
|
224
|
-
self.betas = (
|
225
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
226
|
-
)
|
224
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
227
225
|
elif beta_schedule == "squaredcos_cap_v2":
|
228
226
|
# Glide cosine schedule
|
229
227
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -249,6 +247,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
|
249
247
|
# setable values
|
250
248
|
self.num_inference_steps = None
|
251
249
|
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
|
250
|
+
self.custom_timesteps = False
|
252
251
|
|
253
252
|
self._step_index = None
|
254
253
|
|
@@ -326,17 +325,19 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
|
326
325
|
|
327
326
|
def set_timesteps(
|
328
327
|
self,
|
329
|
-
num_inference_steps: int,
|
328
|
+
num_inference_steps: Optional[int] = None,
|
330
329
|
device: Union[str, torch.device] = None,
|
331
330
|
original_inference_steps: Optional[int] = None,
|
331
|
+
timesteps: Optional[List[int]] = None,
|
332
332
|
strength: int = 1.0,
|
333
333
|
):
|
334
334
|
"""
|
335
335
|
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
336
336
|
|
337
337
|
Args:
|
338
|
-
num_inference_steps (`int
|
339
|
-
The number of diffusion steps used when generating samples with a pre-trained model.
|
338
|
+
num_inference_steps (`int`, *optional*):
|
339
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
340
|
+
`timesteps` must be `None`.
|
340
341
|
device (`str` or `torch.device`, *optional*):
|
341
342
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
342
343
|
original_inference_steps (`int`, *optional*):
|
@@ -344,16 +345,19 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
|
344
345
|
schedule (which is different from the standard `diffusers` implementation). We will then take
|
345
346
|
`num_inference_steps` timesteps from this schedule, evenly spaced in terms of indices, and use that as
|
346
347
|
our final timestep schedule. If not set, this will default to the `original_inference_steps` attribute.
|
348
|
+
timesteps (`List[int]`, *optional*):
|
349
|
+
Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
|
350
|
+
timestep spacing strategy of equal spacing between timesteps on the training/distillation timestep
|
351
|
+
schedule is used. If `timesteps` is passed, `num_inference_steps` must be `None`.
|
347
352
|
"""
|
353
|
+
# 0. Check inputs
|
354
|
+
if num_inference_steps is None and timesteps is None:
|
355
|
+
raise ValueError("Must pass exactly one of `num_inference_steps` or `custom_timesteps`.")
|
348
356
|
|
349
|
-
if num_inference_steps
|
350
|
-
raise ValueError(
|
351
|
-
f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
|
352
|
-
f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
|
353
|
-
f" maximal {self.config.num_train_timesteps} timesteps."
|
354
|
-
)
|
357
|
+
if num_inference_steps is not None and timesteps is not None:
|
358
|
+
raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.")
|
355
359
|
|
356
|
-
|
360
|
+
# 1. Calculate the LCM original training/distillation timestep schedule.
|
357
361
|
original_steps = (
|
358
362
|
original_inference_steps if original_inference_steps is not None else self.config.original_inference_steps
|
359
363
|
)
|
@@ -365,23 +369,97 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
|
365
369
|
f" maximal {self.config.num_train_timesteps} timesteps."
|
366
370
|
)
|
367
371
|
|
368
|
-
if num_inference_steps > original_steps:
|
369
|
-
raise ValueError(
|
370
|
-
f"`num_inference_steps`: {num_inference_steps} cannot be larger than `original_inference_steps`:"
|
371
|
-
f" {original_steps} because the final timestep schedule will be a subset of the"
|
372
|
-
f" `original_inference_steps`-sized initial timestep schedule."
|
373
|
-
)
|
374
|
-
|
375
372
|
# LCM Timesteps Setting
|
376
|
-
#
|
377
|
-
|
378
|
-
# LCM Training Steps Schedule
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
373
|
+
# The skipping step parameter k from the paper.
|
374
|
+
k = self.config.num_train_timesteps // original_steps
|
375
|
+
# LCM Training/Distillation Steps Schedule
|
376
|
+
# Currently, only a linearly-spaced schedule is supported (same as in the LCM distillation scripts).
|
377
|
+
lcm_origin_timesteps = np.asarray(list(range(1, int(original_steps * strength) + 1))) * k - 1
|
378
|
+
|
379
|
+
# 2. Calculate the LCM inference timestep schedule.
|
380
|
+
if timesteps is not None:
|
381
|
+
# 2.1 Handle custom timestep schedules.
|
382
|
+
train_timesteps = set(lcm_origin_timesteps)
|
383
|
+
non_train_timesteps = []
|
384
|
+
for i in range(1, len(timesteps)):
|
385
|
+
if timesteps[i] >= timesteps[i - 1]:
|
386
|
+
raise ValueError("`custom_timesteps` must be in descending order.")
|
387
|
+
|
388
|
+
if timesteps[i] not in train_timesteps:
|
389
|
+
non_train_timesteps.append(timesteps[i])
|
390
|
+
|
391
|
+
if timesteps[0] >= self.config.num_train_timesteps:
|
392
|
+
raise ValueError(
|
393
|
+
f"`timesteps` must start before `self.config.train_timesteps`:"
|
394
|
+
f" {self.config.num_train_timesteps}."
|
395
|
+
)
|
396
|
+
|
397
|
+
# Raise warning if timestep schedule does not start with self.config.num_train_timesteps - 1
|
398
|
+
if strength == 1.0 and timesteps[0] != self.config.num_train_timesteps - 1:
|
399
|
+
logger.warning(
|
400
|
+
f"The first timestep on the custom timestep schedule is {timesteps[0]}, not"
|
401
|
+
f" `self.config.num_train_timesteps - 1`: {self.config.num_train_timesteps - 1}. You may get"
|
402
|
+
f" unexpected results when using this timestep schedule."
|
403
|
+
)
|
404
|
+
|
405
|
+
# Raise warning if custom timestep schedule contains timesteps not on original timestep schedule
|
406
|
+
if non_train_timesteps:
|
407
|
+
logger.warning(
|
408
|
+
f"The custom timestep schedule contains the following timesteps which are not on the original"
|
409
|
+
f" training/distillation timestep schedule: {non_train_timesteps}. You may get unexpected results"
|
410
|
+
f" when using this timestep schedule."
|
411
|
+
)
|
412
|
+
|
413
|
+
# Raise warning if custom timestep schedule is longer than original_steps
|
414
|
+
if len(timesteps) > original_steps:
|
415
|
+
logger.warning(
|
416
|
+
f"The number of timesteps in the custom timestep schedule is {len(timesteps)}, which exceeds the"
|
417
|
+
f" the length of the timestep schedule used for training: {original_steps}. You may get some"
|
418
|
+
f" unexpected results when using this timestep schedule."
|
419
|
+
)
|
420
|
+
|
421
|
+
timesteps = np.array(timesteps, dtype=np.int64)
|
422
|
+
self.num_inference_steps = len(timesteps)
|
423
|
+
self.custom_timesteps = True
|
424
|
+
|
425
|
+
# Apply strength (e.g. for img2img pipelines) (see StableDiffusionImg2ImgPipeline.get_timesteps)
|
426
|
+
init_timestep = min(int(self.num_inference_steps * strength), self.num_inference_steps)
|
427
|
+
t_start = max(self.num_inference_steps - init_timestep, 0)
|
428
|
+
timesteps = timesteps[t_start * self.order :]
|
429
|
+
# TODO: also reset self.num_inference_steps?
|
430
|
+
else:
|
431
|
+
# 2.2 Create the "standard" LCM inference timestep schedule.
|
432
|
+
if num_inference_steps > self.config.num_train_timesteps:
|
433
|
+
raise ValueError(
|
434
|
+
f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
|
435
|
+
f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
|
436
|
+
f" maximal {self.config.num_train_timesteps} timesteps."
|
437
|
+
)
|
438
|
+
|
439
|
+
skipping_step = len(lcm_origin_timesteps) // num_inference_steps
|
440
|
+
|
441
|
+
if skipping_step < 1:
|
442
|
+
raise ValueError(
|
443
|
+
f"The combination of `original_steps x strength`: {original_steps} x {strength} is smaller than `num_inference_steps`: {num_inference_steps}. Make sure to either reduce `num_inference_steps` to a value smaller than {int(original_steps * strength)} or increase `strength` to a value higher than {float(num_inference_steps / original_steps)}."
|
444
|
+
)
|
445
|
+
|
446
|
+
self.num_inference_steps = num_inference_steps
|
447
|
+
|
448
|
+
if num_inference_steps > original_steps:
|
449
|
+
raise ValueError(
|
450
|
+
f"`num_inference_steps`: {num_inference_steps} cannot be larger than `original_inference_steps`:"
|
451
|
+
f" {original_steps} because the final timestep schedule will be a subset of the"
|
452
|
+
f" `original_inference_steps`-sized initial timestep schedule."
|
453
|
+
)
|
454
|
+
|
455
|
+
# LCM Inference Steps Schedule
|
456
|
+
lcm_origin_timesteps = lcm_origin_timesteps[::-1].copy()
|
457
|
+
# Select (approximately) evenly spaced indices from lcm_origin_timesteps.
|
458
|
+
inference_indices = np.linspace(0, len(lcm_origin_timesteps), num=num_inference_steps, endpoint=False)
|
459
|
+
inference_indices = np.floor(inference_indices).astype(np.int64)
|
460
|
+
timesteps = lcm_origin_timesteps[inference_indices]
|
461
|
+
|
462
|
+
self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.long)
|
385
463
|
|
386
464
|
self._step_index = None
|
387
465
|
|
@@ -536,3 +614,19 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
|
536
614
|
|
537
615
|
def __len__(self):
|
538
616
|
return self.config.num_train_timesteps
|
617
|
+
|
618
|
+
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep
|
619
|
+
def previous_timestep(self, timestep):
|
620
|
+
if self.custom_timesteps:
|
621
|
+
index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
|
622
|
+
if index == self.timesteps.shape[0] - 1:
|
623
|
+
prev_t = torch.tensor(-1)
|
624
|
+
else:
|
625
|
+
prev_t = self.timesteps[index + 1]
|
626
|
+
else:
|
627
|
+
num_inference_steps = (
|
628
|
+
self.num_inference_steps if self.num_inference_steps else self.config.num_train_timesteps
|
629
|
+
)
|
630
|
+
prev_t = timestep - self.config.num_train_timesteps // num_inference_steps
|
631
|
+
|
632
|
+
return prev_t
|
@@ -146,9 +146,7 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
146
146
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
147
147
|
elif beta_schedule == "scaled_linear":
|
148
148
|
# this schedule is very specific to the latent diffusion model.
|
149
|
-
self.betas = (
|
150
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
151
|
-
)
|
149
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
152
150
|
elif beta_schedule == "squaredcos_cap_v2":
|
153
151
|
# Glide cosine schedule
|
154
152
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -132,9 +132,7 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
132
132
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
133
133
|
elif beta_schedule == "scaled_linear":
|
134
134
|
# this schedule is very specific to the latent diffusion model.
|
135
|
-
self.betas = (
|
136
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
137
|
-
)
|
135
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
138
136
|
elif beta_schedule == "squaredcos_cap_v2":
|
139
137
|
# Glide cosine schedule
|
140
138
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -134,9 +134,7 @@ class RePaintScheduler(SchedulerMixin, ConfigMixin):
|
|
134
134
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
135
135
|
elif beta_schedule == "scaled_linear":
|
136
136
|
# this schedule is very specific to the latent diffusion model.
|
137
|
-
self.betas = (
|
138
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
139
|
-
)
|
137
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
140
138
|
elif beta_schedule == "squaredcos_cap_v2":
|
141
139
|
# Glide cosine schedule
|
142
140
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -162,9 +162,7 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
162
162
|
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
163
163
|
elif beta_schedule == "scaled_linear":
|
164
164
|
# this schedule is very specific to the latent diffusion model.
|
165
|
-
self.betas = (
|
166
|
-
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
167
|
-
)
|
165
|
+
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
168
166
|
elif beta_schedule == "squaredcos_cap_v2":
|
169
167
|
# Glide cosine schedule
|
170
168
|
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
@@ -339,8 +337,20 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
339
337
|
def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
|
340
338
|
"""Constructs the noise schedule of Karras et al. (2022)."""
|
341
339
|
|
342
|
-
|
343
|
-
|
340
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
341
|
+
# TODO: Add this logic to the other schedulers
|
342
|
+
if hasattr(self.config, "sigma_min"):
|
343
|
+
sigma_min = self.config.sigma_min
|
344
|
+
else:
|
345
|
+
sigma_min = None
|
346
|
+
|
347
|
+
if hasattr(self.config, "sigma_max"):
|
348
|
+
sigma_max = self.config.sigma_max
|
349
|
+
else:
|
350
|
+
sigma_max = None
|
351
|
+
|
352
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
353
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
344
354
|
|
345
355
|
rho = 7.0 # 7.0 is the value used in the paper
|
346
356
|
ramp = np.linspace(0, 1, num_inference_steps)
|
diffusers/utils/__init__.py
CHANGED
diffusers/utils/constants.py
CHANGED
@@ -17,13 +17,15 @@ import os
|
|
17
17
|
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home
|
18
18
|
from packaging import version
|
19
19
|
|
20
|
-
from
|
20
|
+
from ..dependency_versions_check import dep_version_check
|
21
|
+
from .import_utils import ENV_VARS_TRUE_VALUES, is_peft_available, is_transformers_available
|
21
22
|
|
22
23
|
|
23
24
|
default_cache_path = HUGGINGFACE_HUB_CACHE
|
24
25
|
|
25
|
-
MIN_PEFT_VERSION = "0.
|
26
|
-
MIN_TRANSFORMERS_VERSION = "4.
|
26
|
+
MIN_PEFT_VERSION = "0.6.0"
|
27
|
+
MIN_TRANSFORMERS_VERSION = "4.34.0"
|
28
|
+
_CHECK_PEFT = os.environ.get("_CHECK_PEFT", "1") in ENV_VARS_TRUE_VALUES
|
27
29
|
|
28
30
|
|
29
31
|
CONFIG_NAME = "config.json"
|
@@ -41,12 +43,15 @@ DEPRECATED_REVISION_ARGS = ["fp16", "non-ema"]
|
|
41
43
|
# Below should be `True` if the current version of `peft` and `transformers` are compatible with
|
42
44
|
# PEFT backend. Will automatically fall back to PEFT backend if the correct versions of the libraries are
|
43
45
|
# available.
|
44
|
-
# For PEFT it is has to be greater than 0.6.0 and for transformers it has to be greater than 4.
|
46
|
+
# For PEFT it is has to be greater than or equal to 0.6.0 and for transformers it has to be greater than or equal to 4.34.0.
|
45
47
|
_required_peft_version = is_peft_available() and version.parse(
|
46
48
|
version.parse(importlib.metadata.version("peft")).base_version
|
47
|
-
)
|
49
|
+
) >= version.parse(MIN_PEFT_VERSION)
|
48
50
|
_required_transformers_version = is_transformers_available() and version.parse(
|
49
51
|
version.parse(importlib.metadata.version("transformers")).base_version
|
50
|
-
)
|
52
|
+
) >= version.parse(MIN_TRANSFORMERS_VERSION)
|
51
53
|
|
52
54
|
USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
|
55
|
+
|
56
|
+
if USE_PEFT_BACKEND and _CHECK_PEFT:
|
57
|
+
dep_version_check("peft")
|
@@ -32,6 +32,21 @@ class AutoencoderKL(metaclass=DummyObject):
|
|
32
32
|
requires_backends(cls, ["torch"])
|
33
33
|
|
34
34
|
|
35
|
+
class AutoencoderKLTemporalDecoder(metaclass=DummyObject):
|
36
|
+
_backends = ["torch"]
|
37
|
+
|
38
|
+
def __init__(self, *args, **kwargs):
|
39
|
+
requires_backends(self, ["torch"])
|
40
|
+
|
41
|
+
@classmethod
|
42
|
+
def from_config(cls, *args, **kwargs):
|
43
|
+
requires_backends(cls, ["torch"])
|
44
|
+
|
45
|
+
@classmethod
|
46
|
+
def from_pretrained(cls, *args, **kwargs):
|
47
|
+
requires_backends(cls, ["torch"])
|
48
|
+
|
49
|
+
|
35
50
|
class AutoencoderTiny(metaclass=DummyObject):
|
36
51
|
_backends = ["torch"]
|
37
52
|
|
@@ -77,6 +92,21 @@ class ControlNetModel(metaclass=DummyObject):
|
|
77
92
|
requires_backends(cls, ["torch"])
|
78
93
|
|
79
94
|
|
95
|
+
class Kandinsky3UNet(metaclass=DummyObject):
|
96
|
+
_backends = ["torch"]
|
97
|
+
|
98
|
+
def __init__(self, *args, **kwargs):
|
99
|
+
requires_backends(self, ["torch"])
|
100
|
+
|
101
|
+
@classmethod
|
102
|
+
def from_config(cls, *args, **kwargs):
|
103
|
+
requires_backends(cls, ["torch"])
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
def from_pretrained(cls, *args, **kwargs):
|
107
|
+
requires_backends(cls, ["torch"])
|
108
|
+
|
109
|
+
|
80
110
|
class ModelMixin(metaclass=DummyObject):
|
81
111
|
_backends = ["torch"]
|
82
112
|
|
@@ -257,6 +287,21 @@ class UNetMotionModel(metaclass=DummyObject):
|
|
257
287
|
requires_backends(cls, ["torch"])
|
258
288
|
|
259
289
|
|
290
|
+
class UNetSpatioTemporalConditionModel(metaclass=DummyObject):
|
291
|
+
_backends = ["torch"]
|
292
|
+
|
293
|
+
def __init__(self, *args, **kwargs):
|
294
|
+
requires_backends(self, ["torch"])
|
295
|
+
|
296
|
+
@classmethod
|
297
|
+
def from_config(cls, *args, **kwargs):
|
298
|
+
requires_backends(cls, ["torch"])
|
299
|
+
|
300
|
+
@classmethod
|
301
|
+
def from_pretrained(cls, *args, **kwargs):
|
302
|
+
requires_backends(cls, ["torch"])
|
303
|
+
|
304
|
+
|
260
305
|
class VQModel(metaclass=DummyObject):
|
261
306
|
_backends = ["torch"]
|
262
307
|
|