diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +3 -1
- diffusers/commands/fp16_safetensors.py +2 -7
- diffusers/configuration_utils.py +23 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/loaders.py +62 -64
- diffusers/models/__init__.py +1 -0
- diffusers/models/activations.py +2 -0
- diffusers/models/attention.py +45 -1
- diffusers/models/autoencoder_tiny.py +193 -0
- diffusers/models/controlnet.py +1 -1
- diffusers/models/embeddings.py +56 -0
- diffusers/models/lora.py +0 -6
- diffusers/models/modeling_flax_utils.py +28 -2
- diffusers/models/modeling_utils.py +33 -16
- diffusers/models/transformer_2d.py +26 -9
- diffusers/models/unet_1d.py +2 -2
- diffusers/models/unet_2d_blocks.py +106 -56
- diffusers/models/unet_2d_condition.py +20 -5
- diffusers/models/vae.py +106 -1
- diffusers/pipelines/__init__.py +1 -0
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
- diffusers/pipelines/auto_pipeline.py +33 -43
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
- diffusers/pipelines/pipeline_flax_utils.py +41 -4
- diffusers/pipelines/pipeline_utils.py +60 -16
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/__init__.py +1 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
- diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
- diffusers/schedulers/scheduling_consistency_models.py +70 -57
- diffusers/schedulers/scheduling_ddim.py +76 -71
- diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
- diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
- diffusers/schedulers/scheduling_ddpm.py +68 -67
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
- diffusers/schedulers/scheduling_deis_multistep.py +93 -85
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
- diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
- diffusers/schedulers/scheduling_euler_discrete.py +63 -56
- diffusers/schedulers/scheduling_heun_discrete.py +57 -45
- diffusers/schedulers/scheduling_ipndm.py +27 -22
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
- diffusers/schedulers/scheduling_karras_ve.py +55 -45
- diffusers/schedulers/scheduling_lms_discrete.py +58 -52
- diffusers/schedulers/scheduling_pndm.py +77 -62
- diffusers/schedulers/scheduling_repaint.py +56 -38
- diffusers/schedulers/scheduling_sde_ve.py +62 -50
- diffusers/schedulers/scheduling_sde_vp.py +32 -11
- diffusers/schedulers/scheduling_unclip.py +3 -3
- diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
- diffusers/schedulers/scheduling_utils.py +41 -35
- diffusers/schedulers/scheduling_utils_flax.py +8 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
- diffusers/utils/hub_utils.py +105 -2
- diffusers/utils/import_utils.py +0 -4
- diffusers/utils/pil_utils.py +19 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
- diffusers/models/cross_attention.py +0 -94
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -31,14 +31,14 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
|
31
31
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->EulerDiscrete
|
32
32
|
class EulerDiscreteSchedulerOutput(BaseOutput):
|
33
33
|
"""
|
34
|
-
Output class for the scheduler's step function output.
|
34
|
+
Output class for the scheduler's `step` function output.
|
35
35
|
|
36
36
|
Args:
|
37
37
|
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
38
|
-
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
|
38
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
39
39
|
denoising loop.
|
40
40
|
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
41
|
-
The predicted denoised sample (x_{0}) based on the model output from the current timestep.
|
41
|
+
The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
|
42
42
|
`pred_original_sample` can be used to preview progress or for guidance.
|
43
43
|
"""
|
44
44
|
|
@@ -93,42 +93,40 @@ def betas_for_alpha_bar(
|
|
93
93
|
|
94
94
|
class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
95
95
|
"""
|
96
|
-
Euler scheduler
|
97
|
-
k-diffusion implementation by Katherine Crowson:
|
98
|
-
https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L51
|
96
|
+
Euler scheduler.
|
99
97
|
|
100
|
-
|
101
|
-
|
102
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
103
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
98
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
99
|
+
methods the library implements for all schedulers such as loading and saving.
|
104
100
|
|
105
101
|
Args:
|
106
|
-
num_train_timesteps (`int
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
102
|
+
num_train_timesteps (`int`, defaults to 1000):
|
103
|
+
The number of diffusion steps to train the model.
|
104
|
+
beta_start (`float`, defaults to 0.0001):
|
105
|
+
The starting `beta` value of inference.
|
106
|
+
beta_end (`float`, defaults to 0.02):
|
107
|
+
The final `beta` value.
|
108
|
+
beta_schedule (`str`, defaults to `"linear"`):
|
109
|
+
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
111
110
|
`linear` or `scaled_linear`.
|
112
|
-
trained_betas (`np.ndarray`, optional):
|
113
|
-
|
114
|
-
prediction_type (`str`,
|
115
|
-
|
116
|
-
|
117
|
-
https://imagen.research.google/video/paper.pdf)
|
118
|
-
interpolation_type
|
119
|
-
interpolation type to compute intermediate sigmas for the scheduler denoising steps. Should be
|
120
|
-
|
111
|
+
trained_betas (`np.ndarray`, *optional*):
|
112
|
+
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
|
113
|
+
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
114
|
+
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
115
|
+
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
116
|
+
Video](https://imagen.research.google/video/paper.pdf) paper).
|
117
|
+
interpolation_type(`str`, defaults to `"linear"`, *optional*):
|
118
|
+
The interpolation type to compute intermediate sigmas for the scheduler denoising steps. Should be on of
|
119
|
+
`"linear"` or `"log_linear"`.
|
121
120
|
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
stable diffusion.
|
121
|
+
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
122
|
+
the sigmas are determined according to a sequence of noise levels {σi}.
|
123
|
+
timestep_spacing (`str`, defaults to `"linspace"`):
|
124
|
+
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
125
|
+
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
126
|
+
steps_offset (`int`, defaults to 0):
|
127
|
+
An offset added to the inference steps. You can use a combination of `offset=1` and
|
128
|
+
`set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
|
129
|
+
Diffusion.
|
132
130
|
"""
|
133
131
|
|
134
132
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -189,14 +187,18 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
189
187
|
self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
|
190
188
|
) -> torch.FloatTensor:
|
191
189
|
"""
|
192
|
-
|
190
|
+
Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
|
191
|
+
current timestep. Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm.
|
193
192
|
|
194
193
|
Args:
|
195
|
-
sample (`torch.FloatTensor`):
|
196
|
-
|
194
|
+
sample (`torch.FloatTensor`):
|
195
|
+
The input sample.
|
196
|
+
timestep (`int`, *optional*):
|
197
|
+
The current timestep in the diffusion chain.
|
197
198
|
|
198
199
|
Returns:
|
199
|
-
`torch.FloatTensor`:
|
200
|
+
`torch.FloatTensor`:
|
201
|
+
A scaled input sample.
|
200
202
|
"""
|
201
203
|
if isinstance(timestep, torch.Tensor):
|
202
204
|
timestep = timestep.to(self.timesteps.device)
|
@@ -210,13 +212,13 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
210
212
|
|
211
213
|
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
212
214
|
"""
|
213
|
-
Sets the timesteps used for the diffusion chain
|
215
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
214
216
|
|
215
217
|
Args:
|
216
218
|
num_inference_steps (`int`):
|
217
|
-
|
218
|
-
device (`str` or `torch.device`, optional):
|
219
|
-
|
219
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
220
|
+
device (`str` or `torch.device`, *optional*):
|
221
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
220
222
|
"""
|
221
223
|
self.num_inference_steps = num_inference_steps
|
222
224
|
|
@@ -317,26 +319,31 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
317
319
|
return_dict: bool = True,
|
318
320
|
) -> Union[EulerDiscreteSchedulerOutput, Tuple]:
|
319
321
|
"""
|
320
|
-
Predict the sample
|
322
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
321
323
|
process from the learned model outputs (most often the predicted noise).
|
322
324
|
|
323
325
|
Args:
|
324
|
-
model_output (`torch.FloatTensor`):
|
325
|
-
|
326
|
+
model_output (`torch.FloatTensor`):
|
327
|
+
The direct output from learned diffusion model.
|
328
|
+
timestep (`float`):
|
329
|
+
The current discrete timestep in the diffusion chain.
|
326
330
|
sample (`torch.FloatTensor`):
|
327
|
-
current instance of sample
|
328
|
-
s_churn (`float`)
|
329
|
-
s_tmin (`float`)
|
330
|
-
s_tmax (`float`)
|
331
|
-
s_noise (`float
|
332
|
-
|
333
|
-
|
331
|
+
A current instance of a sample created by the diffusion process.
|
332
|
+
s_churn (`float`):
|
333
|
+
s_tmin (`float`):
|
334
|
+
s_tmax (`float`):
|
335
|
+
s_noise (`float`, defaults to 1.0):
|
336
|
+
Scaling factor for noise added to the sample.
|
337
|
+
generator (`torch.Generator`, *optional*):
|
338
|
+
A random number generator.
|
339
|
+
return_dict (`bool`):
|
340
|
+
Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
|
341
|
+
tuple.
|
334
342
|
|
335
343
|
Returns:
|
336
|
-
[`~schedulers.
|
337
|
-
|
338
|
-
|
339
|
-
|
344
|
+
[`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`:
|
345
|
+
If return_dict is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
|
346
|
+
returned, otherwise a tuple is returned where the first element is the sample tensor.
|
340
347
|
"""
|
341
348
|
|
342
349
|
if (
|
@@ -70,41 +70,41 @@ def betas_for_alpha_bar(
|
|
70
70
|
|
71
71
|
class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
72
72
|
"""
|
73
|
-
|
74
|
-
k-diffusion implementation by Katherine Crowson:
|
75
|
-
https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L90
|
73
|
+
Scheduler with Heun steps for discrete beta schedules.
|
76
74
|
|
77
|
-
|
78
|
-
|
79
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
80
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
75
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
76
|
+
methods the library implements for all schedulers such as loading and saving.
|
81
77
|
|
82
78
|
Args:
|
83
|
-
num_train_timesteps (`int
|
84
|
-
|
85
|
-
|
79
|
+
num_train_timesteps (`int`, defaults to 1000):
|
80
|
+
The number of diffusion steps to train the model.
|
81
|
+
beta_start (`float`, defaults to 0.0001):
|
82
|
+
The starting `beta` value of inference.
|
83
|
+
beta_end (`float`, defaults to 0.02):
|
84
|
+
The final `beta` value.
|
85
|
+
beta_schedule (`str`, defaults to `"linear"`):
|
86
|
+
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
86
87
|
`linear` or `scaled_linear`.
|
87
|
-
trained_betas (`np.ndarray`, optional):
|
88
|
-
|
89
|
-
prediction_type (`str`,
|
90
|
-
|
91
|
-
|
92
|
-
https://imagen.research.google/video/paper.pdf).
|
93
|
-
clip_sample (`bool`,
|
94
|
-
|
95
|
-
clip_sample_range (`float`,
|
96
|
-
|
88
|
+
trained_betas (`np.ndarray`, *optional*):
|
89
|
+
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
|
90
|
+
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
91
|
+
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
92
|
+
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
93
|
+
Video](https://imagen.research.google/video/paper.pdf) paper).
|
94
|
+
clip_sample (`bool`, defaults to `True`):
|
95
|
+
Clip the predicted sample for numerical stability.
|
96
|
+
clip_sample_range (`float`, defaults to 1.0):
|
97
|
+
The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
|
97
98
|
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
stable diffusion.
|
99
|
+
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
100
|
+
the sigmas are determined according to a sequence of noise levels {σi}.
|
101
|
+
timestep_spacing (`str`, defaults to `"linspace"`):
|
102
|
+
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
103
|
+
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
104
|
+
steps_offset (`int`, defaults to 0):
|
105
|
+
An offset added to the inference steps. You can use a combination of `offset=1` and
|
106
|
+
`set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
|
107
|
+
Diffusion.
|
108
108
|
"""
|
109
109
|
|
110
110
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -181,12 +181,18 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
181
181
|
timestep: Union[float, torch.FloatTensor],
|
182
182
|
) -> torch.FloatTensor:
|
183
183
|
"""
|
184
|
-
Args:
|
185
184
|
Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
|
186
185
|
current timestep.
|
187
|
-
|
186
|
+
|
187
|
+
Args:
|
188
|
+
sample (`torch.FloatTensor`):
|
189
|
+
The input sample.
|
190
|
+
timestep (`int`, *optional*):
|
191
|
+
The current timestep in the diffusion chain.
|
192
|
+
|
188
193
|
Returns:
|
189
|
-
`torch.FloatTensor`:
|
194
|
+
`torch.FloatTensor`:
|
195
|
+
A scaled input sample.
|
190
196
|
"""
|
191
197
|
step_index = self.index_for_timestep(timestep)
|
192
198
|
|
@@ -201,13 +207,13 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
201
207
|
num_train_timesteps: Optional[int] = None,
|
202
208
|
):
|
203
209
|
"""
|
204
|
-
Sets the timesteps used for the diffusion chain
|
210
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
205
211
|
|
206
212
|
Args:
|
207
213
|
num_inference_steps (`int`):
|
208
|
-
|
209
|
-
device (`str` or `torch.device`, optional):
|
210
|
-
|
214
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
215
|
+
device (`str` or `torch.device`, *optional*):
|
216
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
211
217
|
"""
|
212
218
|
self.num_inference_steps = num_inference_steps
|
213
219
|
|
@@ -312,17 +318,23 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
312
318
|
return_dict: bool = True,
|
313
319
|
) -> Union[SchedulerOutput, Tuple]:
|
314
320
|
"""
|
315
|
-
|
316
|
-
Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
|
321
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
317
322
|
process from the learned model outputs (most often the predicted noise).
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
323
|
+
|
324
|
+
Args:
|
325
|
+
model_output (`torch.FloatTensor`):
|
326
|
+
The direct output from learned diffusion model.
|
327
|
+
timestep (`float`):
|
328
|
+
The current discrete timestep in the diffusion chain.
|
329
|
+
sample (`torch.FloatTensor`):
|
330
|
+
A current instance of a sample created by the diffusion process.
|
331
|
+
return_dict (`bool`):
|
332
|
+
Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
|
333
|
+
|
322
334
|
Returns:
|
323
335
|
[`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
|
324
|
-
|
325
|
-
|
336
|
+
If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
|
337
|
+
tuple is returned where the first element is the sample tensor.
|
326
338
|
"""
|
327
339
|
step_index = self.index_for_timestep(timestep)
|
328
340
|
|
@@ -24,18 +24,16 @@ from .scheduling_utils import SchedulerMixin, SchedulerOutput
|
|
24
24
|
|
25
25
|
class IPNDMScheduler(SchedulerMixin, ConfigMixin):
|
26
26
|
"""
|
27
|
-
Improved Pseudo
|
28
|
-
[library](https://github.com/crowsonkb/v-diffusion-pytorch/blob/987f8985e38208345c1959b0ea767a625831cc9b/diffusion/sampling.py#L296)
|
27
|
+
A fourth-order Improved Pseudo Linear Multistep scheduler.
|
29
28
|
|
30
|
-
|
31
|
-
|
32
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
33
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
34
|
-
|
35
|
-
For more details, see the original paper: https://arxiv.org/abs/2202.09778
|
29
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
30
|
+
methods the library implements for all schedulers such as loading and saving.
|
36
31
|
|
37
32
|
Args:
|
38
|
-
num_train_timesteps (`int
|
33
|
+
num_train_timesteps (`int`, defaults to 1000):
|
34
|
+
The number of diffusion steps to train the model.
|
35
|
+
trained_betas (`np.ndarray`, *optional*):
|
36
|
+
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
|
39
37
|
"""
|
40
38
|
|
41
39
|
order = 1
|
@@ -60,11 +58,13 @@ class IPNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
60
58
|
|
61
59
|
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
62
60
|
"""
|
63
|
-
Sets the discrete timesteps used for the diffusion chain
|
61
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
64
62
|
|
65
63
|
Args:
|
66
64
|
num_inference_steps (`int`):
|
67
|
-
|
65
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
66
|
+
device (`str` or `torch.device`, *optional*):
|
67
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
68
68
|
"""
|
69
69
|
self.num_inference_steps = num_inference_steps
|
70
70
|
steps = torch.linspace(1, 0, num_inference_steps + 1)[:-1]
|
@@ -90,20 +90,23 @@ class IPNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
90
90
|
return_dict: bool = True,
|
91
91
|
) -> Union[SchedulerOutput, Tuple]:
|
92
92
|
"""
|
93
|
-
|
94
|
-
times to approximate the solution.
|
93
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
|
94
|
+
the linear multistep method. It performs one forward pass multiple times to approximate the solution.
|
95
95
|
|
96
96
|
Args:
|
97
|
-
model_output (`torch.FloatTensor`):
|
98
|
-
|
97
|
+
model_output (`torch.FloatTensor`):
|
98
|
+
The direct output from learned diffusion model.
|
99
|
+
timestep (`int`):
|
100
|
+
The current discrete timestep in the diffusion chain.
|
99
101
|
sample (`torch.FloatTensor`):
|
100
|
-
current instance of sample
|
101
|
-
return_dict (`bool`):
|
102
|
+
A current instance of a sample created by the diffusion process.
|
103
|
+
return_dict (`bool`):
|
104
|
+
Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
|
102
105
|
|
103
106
|
Returns:
|
104
|
-
[`~scheduling_utils.SchedulerOutput`] or `tuple`:
|
105
|
-
|
106
|
-
|
107
|
+
[`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
|
108
|
+
If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
|
109
|
+
tuple is returned where the first element is the sample tensor.
|
107
110
|
"""
|
108
111
|
if self.num_inference_steps is None:
|
109
112
|
raise ValueError(
|
@@ -138,10 +141,12 @@ class IPNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
138
141
|
current timestep.
|
139
142
|
|
140
143
|
Args:
|
141
|
-
sample (`torch.FloatTensor`):
|
144
|
+
sample (`torch.FloatTensor`):
|
145
|
+
The input sample.
|
142
146
|
|
143
147
|
Returns:
|
144
|
-
`torch.FloatTensor`:
|
148
|
+
`torch.FloatTensor`:
|
149
|
+
A scaled input sample.
|
145
150
|
"""
|
146
151
|
return sample
|
147
152
|
|
@@ -71,36 +71,35 @@ def betas_for_alpha_bar(
|
|
71
71
|
|
72
72
|
class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
73
73
|
"""
|
74
|
-
|
75
|
-
https://
|
74
|
+
KDPM2DiscreteScheduler with ancestral sampling is inspired by the DPMSolver2 and Algorithm 2 from the [Elucidating
|
75
|
+
the Design Space of Diffusion-Based Generative Models](https://huggingface.co/papers/2206.00364) paper.
|
76
76
|
|
77
|
-
|
78
|
-
|
79
|
-
[`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
|
80
|
-
function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
|
81
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
82
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
77
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
78
|
+
methods the library implements for all schedulers such as loading and saving.
|
83
79
|
|
84
80
|
Args:
|
85
|
-
num_train_timesteps (`int
|
86
|
-
|
87
|
-
|
81
|
+
num_train_timesteps (`int`, defaults to 1000):
|
82
|
+
The number of diffusion steps to train the model.
|
83
|
+
beta_start (`float`, defaults to 0.00085):
|
84
|
+
The starting `beta` value of inference.
|
85
|
+
beta_end (`float`, defaults to 0.012):
|
86
|
+
The final `beta` value.
|
87
|
+
beta_schedule (`str`, defaults to `"linear"`):
|
88
|
+
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
88
89
|
`linear` or `scaled_linear`.
|
89
|
-
trained_betas (`np.ndarray`, optional):
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
`set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
|
103
|
-
stable diffusion.
|
90
|
+
trained_betas (`np.ndarray`, *optional*):
|
91
|
+
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
|
92
|
+
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
93
|
+
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
94
|
+
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
95
|
+
Video](https://imagen.research.google/video/paper.pdf) paper).
|
96
|
+
timestep_spacing (`str`, defaults to `"linspace"`):
|
97
|
+
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
98
|
+
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
99
|
+
steps_offset (`int`, defaults to 0):
|
100
|
+
An offset added to the inference steps. You can use a combination of `offset=1` and
|
101
|
+
`set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
|
102
|
+
Diffusion.
|
104
103
|
"""
|
105
104
|
|
106
105
|
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
@@ -172,12 +171,18 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
172
171
|
timestep: Union[float, torch.FloatTensor],
|
173
172
|
) -> torch.FloatTensor:
|
174
173
|
"""
|
175
|
-
Args:
|
176
174
|
Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
|
177
175
|
current timestep.
|
178
|
-
|
176
|
+
|
177
|
+
Args:
|
178
|
+
sample (`torch.FloatTensor`):
|
179
|
+
The input sample.
|
180
|
+
timestep (`int`, *optional*):
|
181
|
+
The current timestep in the diffusion chain.
|
182
|
+
|
179
183
|
Returns:
|
180
|
-
`torch.FloatTensor`:
|
184
|
+
`torch.FloatTensor`:
|
185
|
+
A scaled input sample.
|
181
186
|
"""
|
182
187
|
step_index = self.index_for_timestep(timestep)
|
183
188
|
|
@@ -196,13 +201,13 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
196
201
|
num_train_timesteps: Optional[int] = None,
|
197
202
|
):
|
198
203
|
"""
|
199
|
-
Sets the timesteps used for the diffusion chain
|
204
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
200
205
|
|
201
206
|
Args:
|
202
207
|
num_inference_steps (`int`):
|
203
|
-
|
204
|
-
device (`str` or `torch.device`, optional):
|
205
|
-
|
208
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
209
|
+
device (`str` or `torch.device`, *optional*):
|
210
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
206
211
|
"""
|
207
212
|
self.num_inference_steps = num_inference_steps
|
208
213
|
|
@@ -307,17 +312,25 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
307
312
|
return_dict: bool = True,
|
308
313
|
) -> Union[SchedulerOutput, Tuple]:
|
309
314
|
"""
|
310
|
-
|
311
|
-
Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
|
315
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
312
316
|
process from the learned model outputs (most often the predicted noise).
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
+
|
318
|
+
Args:
|
319
|
+
model_output (`torch.FloatTensor`):
|
320
|
+
The direct output from learned diffusion model.
|
321
|
+
timestep (`float`):
|
322
|
+
The current discrete timestep in the diffusion chain.
|
323
|
+
sample (`torch.FloatTensor`):
|
324
|
+
A current instance of a sample created by the diffusion process.
|
325
|
+
generator (`torch.Generator`, *optional*):
|
326
|
+
A random number generator.
|
327
|
+
return_dict (`bool`):
|
328
|
+
Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
|
329
|
+
|
317
330
|
Returns:
|
318
331
|
[`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
|
319
|
-
|
320
|
-
|
332
|
+
If return_dict is `True`, [`~schedulers.scheduling_ddim.SchedulerOutput`] is returned, otherwise a
|
333
|
+
tuple is returned where the first element is the sample tensor.
|
321
334
|
"""
|
322
335
|
step_index = self.index_for_timestep(timestep)
|
323
336
|
|