diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +3 -1
- diffusers/commands/fp16_safetensors.py +2 -7
- diffusers/configuration_utils.py +23 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/loaders.py +62 -64
- diffusers/models/__init__.py +1 -0
- diffusers/models/activations.py +2 -0
- diffusers/models/attention.py +45 -1
- diffusers/models/autoencoder_tiny.py +193 -0
- diffusers/models/controlnet.py +1 -1
- diffusers/models/embeddings.py +56 -0
- diffusers/models/lora.py +0 -6
- diffusers/models/modeling_flax_utils.py +28 -2
- diffusers/models/modeling_utils.py +33 -16
- diffusers/models/transformer_2d.py +26 -9
- diffusers/models/unet_1d.py +2 -2
- diffusers/models/unet_2d_blocks.py +106 -56
- diffusers/models/unet_2d_condition.py +20 -5
- diffusers/models/vae.py +106 -1
- diffusers/pipelines/__init__.py +1 -0
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
- diffusers/pipelines/auto_pipeline.py +33 -43
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
- diffusers/pipelines/pipeline_flax_utils.py +41 -4
- diffusers/pipelines/pipeline_utils.py +60 -16
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/__init__.py +1 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
- diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
- diffusers/schedulers/scheduling_consistency_models.py +70 -57
- diffusers/schedulers/scheduling_ddim.py +76 -71
- diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
- diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
- diffusers/schedulers/scheduling_ddpm.py +68 -67
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
- diffusers/schedulers/scheduling_deis_multistep.py +93 -85
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
- diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
- diffusers/schedulers/scheduling_euler_discrete.py +63 -56
- diffusers/schedulers/scheduling_heun_discrete.py +57 -45
- diffusers/schedulers/scheduling_ipndm.py +27 -22
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
- diffusers/schedulers/scheduling_karras_ve.py +55 -45
- diffusers/schedulers/scheduling_lms_discrete.py +58 -52
- diffusers/schedulers/scheduling_pndm.py +77 -62
- diffusers/schedulers/scheduling_repaint.py +56 -38
- diffusers/schedulers/scheduling_sde_ve.py +62 -50
- diffusers/schedulers/scheduling_sde_vp.py +32 -11
- diffusers/schedulers/scheduling_unclip.py +3 -3
- diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
- diffusers/schedulers/scheduling_utils.py +41 -35
- diffusers/schedulers/scheduling_utils_flax.py +8 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
- diffusers/utils/hub_utils.py +105 -2
- diffusers/utils/import_utils.py +0 -4
- diffusers/utils/pil_utils.py +19 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
- diffusers/models/cross_attention.py +0 -94
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -29,11 +29,11 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
|
29
29
|
@dataclass
|
30
30
|
class CMStochasticIterativeSchedulerOutput(BaseOutput):
|
31
31
|
"""
|
32
|
-
Output class for the scheduler's step function
|
32
|
+
Output class for the scheduler's `step` function.
|
33
33
|
|
34
34
|
Args:
|
35
35
|
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
36
|
-
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
|
36
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
37
37
|
denoising loop.
|
38
38
|
"""
|
39
39
|
|
@@ -42,38 +42,32 @@ class CMStochasticIterativeSchedulerOutput(BaseOutput):
|
|
42
42
|
|
43
43
|
class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
|
44
44
|
"""
|
45
|
-
Multistep and onestep sampling for consistency models
|
46
|
-
paper [1].
|
45
|
+
Multistep and onestep sampling for consistency models.
|
47
46
|
|
48
|
-
|
49
|
-
|
50
|
-
Generative Models." https://arxiv.org/abs/2206.00364
|
51
|
-
|
52
|
-
[`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
|
53
|
-
function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
|
54
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
55
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
47
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
48
|
+
methods the library implements for all schedulers such as loading and saving.
|
56
49
|
|
57
50
|
Args:
|
58
|
-
num_train_timesteps (`int
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
51
|
+
num_train_timesteps (`int`, defaults to 40):
|
52
|
+
The number of diffusion steps to train the model.
|
53
|
+
sigma_min (`float`, defaults to 0.002):
|
54
|
+
Minimum noise magnitude in the sigma schedule. Defaults to 0.002 from the original implementation.
|
55
|
+
sigma_max (`float`, defaults to 80.0):
|
56
|
+
Maximum noise magnitude in the sigma schedule. Defaults to 80.0 from the original implementation.
|
57
|
+
sigma_data (`float`, defaults to 0.5):
|
58
|
+
The standard deviation of the data distribution from the EDM
|
59
|
+
[paper](https://huggingface.co/papers/2206.00364). Defaults to 0.5 from the original implementation.
|
60
|
+
s_noise (`float`, defaults to 1.0):
|
67
61
|
The amount of additional noise to counteract loss of detail during sampling. A reasonable range is [1.000,
|
68
|
-
1.011].
|
69
|
-
rho (`float
|
70
|
-
The
|
71
|
-
|
72
|
-
clip_denoised (`bool`):
|
73
|
-
Whether to clip the denoised outputs to `(-1, 1)`.
|
62
|
+
1.011]. Defaults to 1.0 from the original implementation.
|
63
|
+
rho (`float`, defaults to 7.0):
|
64
|
+
The parameter for calculating the Karras sigma schedule from the EDM
|
65
|
+
[paper](https://huggingface.co/papers/2206.00364). Defaults to 7.0 from the original implementation.
|
66
|
+
clip_denoised (`bool`, defaults to `True`):
|
67
|
+
Whether to clip the denoised outputs to `(-1, 1)`.
|
74
68
|
timesteps (`List` or `np.ndarray` or `torch.Tensor`, *optional*):
|
75
|
-
|
76
|
-
order.
|
69
|
+
An explicit timestep schedule that can be optionally specified. The timesteps are expected to be in
|
70
|
+
increasing order.
|
77
71
|
"""
|
78
72
|
|
79
73
|
order = 1
|
@@ -114,13 +108,17 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
|
|
114
108
|
self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
|
115
109
|
) -> torch.FloatTensor:
|
116
110
|
"""
|
117
|
-
Scales the consistency model input by `(sigma**2 + sigma_data**2) ** 0.5
|
111
|
+
Scales the consistency model input by `(sigma**2 + sigma_data**2) ** 0.5`.
|
118
112
|
|
119
113
|
Args:
|
120
|
-
sample (`torch.FloatTensor`):
|
121
|
-
|
114
|
+
sample (`torch.FloatTensor`):
|
115
|
+
The input sample.
|
116
|
+
timestep (`float` or `torch.FloatTensor`):
|
117
|
+
The current timestep in the diffusion chain.
|
118
|
+
|
122
119
|
Returns:
|
123
|
-
`torch.FloatTensor`:
|
120
|
+
`torch.FloatTensor`:
|
121
|
+
A scaled input sample.
|
124
122
|
"""
|
125
123
|
# Get sigma corresponding to timestep
|
126
124
|
if isinstance(timestep, torch.Tensor):
|
@@ -135,12 +133,15 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
|
|
135
133
|
|
136
134
|
def sigma_to_t(self, sigmas: Union[float, np.ndarray]):
|
137
135
|
"""
|
138
|
-
Gets scaled timesteps from the Karras sigmas
|
136
|
+
Gets scaled timesteps from the Karras sigmas for input to the consistency model.
|
139
137
|
|
140
138
|
Args:
|
141
|
-
sigmas (`float` or `np.ndarray`):
|
139
|
+
sigmas (`float` or `np.ndarray`):
|
140
|
+
A single Karras sigma or an array of Karras sigmas.
|
141
|
+
|
142
142
|
Returns:
|
143
|
-
`float` or `np.ndarray`:
|
143
|
+
`float` or `np.ndarray`:
|
144
|
+
A scaled input timestep or scaled input timestep array.
|
144
145
|
"""
|
145
146
|
if not isinstance(sigmas, np.ndarray):
|
146
147
|
sigmas = np.array(sigmas, dtype=np.float64)
|
@@ -156,17 +157,17 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
|
|
156
157
|
timesteps: Optional[List[int]] = None,
|
157
158
|
):
|
158
159
|
"""
|
159
|
-
Sets the timesteps used for the diffusion chain
|
160
|
+
Sets the timesteps used for the diffusion chain (to be run before inference).
|
160
161
|
|
161
162
|
Args:
|
162
163
|
num_inference_steps (`int`):
|
163
|
-
|
164
|
-
device (`str` or `torch.device`, optional):
|
165
|
-
|
166
|
-
timesteps (`List[int]`, optional):
|
167
|
-
|
168
|
-
timestep spacing strategy of equal spacing between timesteps is used. If passed,
|
169
|
-
must be `None`.
|
164
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
165
|
+
device (`str` or `torch.device`, *optional*):
|
166
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
167
|
+
timesteps (`List[int]`, *optional*):
|
168
|
+
Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
|
169
|
+
timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed,
|
170
|
+
`num_inference_steps` must be `None`.
|
170
171
|
"""
|
171
172
|
if num_inference_steps is None and timesteps is None:
|
172
173
|
raise ValueError("Exactly one of `num_inference_steps` or `timesteps` must be supplied.")
|
@@ -241,17 +242,22 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
|
|
241
242
|
|
242
243
|
def get_scalings_for_boundary_condition(self, sigma):
|
243
244
|
"""
|
244
|
-
Gets the scalings used in the consistency model parameterization
|
245
|
-
|
245
|
+
Gets the scalings used in the consistency model parameterization (from Appendix C of the
|
246
|
+
[paper](https://huggingface.co/papers/2303.01469)) to enforce boundary condition.
|
246
247
|
|
247
|
-
|
248
|
+
<Tip>
|
249
|
+
|
250
|
+
`epsilon` in the equations for `c_skip` and `c_out` is set to `sigma_min`.
|
251
|
+
|
252
|
+
</Tip>
|
248
253
|
|
249
254
|
Args:
|
250
255
|
sigma (`torch.FloatTensor`):
|
251
256
|
The current sigma in the Karras sigma schedule.
|
257
|
+
|
252
258
|
Returns:
|
253
259
|
`tuple`:
|
254
|
-
A two-element tuple where c_skip (which weights the current sample) is the first element and c_out
|
260
|
+
A two-element tuple where `c_skip` (which weights the current sample) is the first element and `c_out`
|
255
261
|
(which weights the consistency model output) is the second element.
|
256
262
|
"""
|
257
263
|
sigma_min = self.config.sigma_min
|
@@ -270,20 +276,27 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
|
|
270
276
|
return_dict: bool = True,
|
271
277
|
) -> Union[CMStochasticIterativeSchedulerOutput, Tuple]:
|
272
278
|
"""
|
273
|
-
Predict the sample
|
279
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
274
280
|
process from the learned model outputs (most often the predicted noise).
|
275
281
|
|
276
282
|
Args:
|
277
|
-
model_output (`torch.FloatTensor`):
|
278
|
-
|
283
|
+
model_output (`torch.FloatTensor`):
|
284
|
+
The direct output from the learned diffusion model.
|
285
|
+
timestep (`float`):
|
286
|
+
The current timestep in the diffusion chain.
|
279
287
|
sample (`torch.FloatTensor`):
|
280
|
-
current instance of sample
|
281
|
-
generator (`torch.Generator`, *optional*):
|
282
|
-
|
288
|
+
A current instance of a sample created by the diffusion process.
|
289
|
+
generator (`torch.Generator`, *optional*):
|
290
|
+
A random number generator.
|
291
|
+
return_dict (`bool`, *optional*, defaults to `True`):
|
292
|
+
Whether or not to return a
|
293
|
+
[`~schedulers.scheduling_consistency_models.CMStochasticIterativeSchedulerOutput`] or `tuple`.
|
294
|
+
|
283
295
|
Returns:
|
284
|
-
[`~schedulers.
|
285
|
-
|
286
|
-
|
296
|
+
[`~schedulers.scheduling_consistency_models.CMStochasticIterativeSchedulerOutput`] or `tuple`:
|
297
|
+
If return_dict is `True`,
|
298
|
+
[`~schedulers.scheduling_consistency_models.CMStochasticIterativeSchedulerOutput`] is returned,
|
299
|
+
otherwise a tuple is returned where the first element is the sample tensor.
|
287
300
|
"""
|
288
301
|
|
289
302
|
if (
|
@@ -31,14 +31,14 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
|
|
31
31
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM
|
32
32
|
class DDIMSchedulerOutput(BaseOutput):
|
33
33
|
"""
|
34
|
-
Output class for the scheduler's step function output.
|
34
|
+
Output class for the scheduler's `step` function output.
|
35
35
|
|
36
36
|
Args:
|
37
37
|
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
38
|
-
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
|
38
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
39
39
|
denoising loop.
|
40
40
|
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
41
|
-
The predicted denoised sample (x_{0}) based on the model output from the current timestep.
|
41
|
+
The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
|
42
42
|
`pred_original_sample` can be used to preview progress or for guidance.
|
43
43
|
"""
|
44
44
|
|
@@ -129,57 +129,53 @@ def rescale_zero_terminal_snr(betas):
|
|
129
129
|
|
130
130
|
class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
131
131
|
"""
|
132
|
-
|
133
|
-
|
132
|
+
`DDIMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with
|
133
|
+
non-Markovian guidance.
|
134
134
|
|
135
|
-
|
136
|
-
|
137
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
138
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
139
|
-
|
140
|
-
For more details, see the original paper: https://arxiv.org/abs/2010.02502
|
135
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
136
|
+
methods the library implements for all schedulers such as loading and saving.
|
141
137
|
|
142
138
|
Args:
|
143
|
-
num_train_timesteps (`int
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
139
|
+
num_train_timesteps (`int`, defaults to 1000):
|
140
|
+
The number of diffusion steps to train the model.
|
141
|
+
beta_start (`float`, defaults to 0.0001):
|
142
|
+
The starting `beta` value of inference.
|
143
|
+
beta_end (`float`, defaults to 0.02):
|
144
|
+
The final `beta` value.
|
145
|
+
beta_schedule (`str`, defaults to `"linear"`):
|
146
|
+
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
148
147
|
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
|
149
|
-
trained_betas (`np.ndarray`, optional):
|
150
|
-
|
151
|
-
clip_sample (`bool`,
|
152
|
-
|
153
|
-
clip_sample_range (`float`,
|
154
|
-
|
155
|
-
set_alpha_to_one (`bool`,
|
156
|
-
|
157
|
-
|
158
|
-
otherwise it uses the value
|
159
|
-
steps_offset (`int`,
|
160
|
-
|
161
|
-
`set_alpha_to_one=False
|
162
|
-
|
163
|
-
prediction_type (`str`,
|
164
|
-
|
165
|
-
|
166
|
-
https://imagen.research.google/video/paper.pdf)
|
167
|
-
thresholding (`bool`,
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
the
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
whether to rescale the betas to have zero terminal SNR (proposed by https://arxiv.org/pdf/2305.08891.pdf).
|
181
|
-
This can enable the model to generate very bright and dark samples instead of limiting it to samples with
|
182
|
-
medium brightness. Loosely related to
|
148
|
+
trained_betas (`np.ndarray`, *optional*):
|
149
|
+
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
|
150
|
+
clip_sample (`bool`, defaults to `True`):
|
151
|
+
Clip the predicted sample for numerical stability.
|
152
|
+
clip_sample_range (`float`, defaults to 1.0):
|
153
|
+
The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
|
154
|
+
set_alpha_to_one (`bool`, defaults to `True`):
|
155
|
+
Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
|
156
|
+
there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
|
157
|
+
otherwise it uses the alpha value at step 0.
|
158
|
+
steps_offset (`int`, defaults to 0):
|
159
|
+
An offset added to the inference steps. You can use a combination of `offset=1` and
|
160
|
+
`set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
|
161
|
+
Diffusion.
|
162
|
+
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
163
|
+
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
164
|
+
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
165
|
+
Video](https://imagen.research.google/video/paper.pdf) paper).
|
166
|
+
thresholding (`bool`, defaults to `False`):
|
167
|
+
Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
|
168
|
+
as Stable Diffusion.
|
169
|
+
dynamic_thresholding_ratio (`float`, defaults to 0.995):
|
170
|
+
The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
|
171
|
+
sample_max_value (`float`, defaults to 1.0):
|
172
|
+
The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
|
173
|
+
timestep_spacing (`str`, defaults to `"leading"`):
|
174
|
+
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
175
|
+
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
176
|
+
rescale_betas_zero_snr (`bool`, defaults to `False`):
|
177
|
+
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
|
178
|
+
dark samples instead of limiting it to samples with medium brightness. Loosely related to
|
183
179
|
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
|
184
180
|
"""
|
185
181
|
|
@@ -246,11 +242,14 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
246
242
|
current timestep.
|
247
243
|
|
248
244
|
Args:
|
249
|
-
sample (`torch.FloatTensor`):
|
250
|
-
|
245
|
+
sample (`torch.FloatTensor`):
|
246
|
+
The input sample.
|
247
|
+
timestep (`int`, *optional*):
|
248
|
+
The current timestep in the diffusion chain.
|
251
249
|
|
252
250
|
Returns:
|
253
|
-
`torch.FloatTensor`:
|
251
|
+
`torch.FloatTensor`:
|
252
|
+
A scaled input sample.
|
254
253
|
"""
|
255
254
|
return sample
|
256
255
|
|
@@ -301,11 +300,11 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
301
300
|
|
302
301
|
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
303
302
|
"""
|
304
|
-
Sets the discrete timesteps used for the diffusion chain
|
303
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
305
304
|
|
306
305
|
Args:
|
307
306
|
num_inference_steps (`int`):
|
308
|
-
|
307
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
309
308
|
"""
|
310
309
|
|
311
310
|
if num_inference_steps > self.config.num_train_timesteps:
|
@@ -356,29 +355,35 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
356
355
|
return_dict: bool = True,
|
357
356
|
) -> Union[DDIMSchedulerOutput, Tuple]:
|
358
357
|
"""
|
359
|
-
Predict the sample
|
358
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
360
359
|
process from the learned model outputs (most often the predicted noise).
|
361
360
|
|
362
361
|
Args:
|
363
|
-
model_output (`torch.FloatTensor`):
|
364
|
-
|
362
|
+
model_output (`torch.FloatTensor`):
|
363
|
+
The direct output from learned diffusion model.
|
364
|
+
timestep (`float`):
|
365
|
+
The current discrete timestep in the diffusion chain.
|
365
366
|
sample (`torch.FloatTensor`):
|
366
|
-
current instance of sample
|
367
|
-
eta (`float`):
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
367
|
+
A current instance of a sample created by the diffusion process.
|
368
|
+
eta (`float`):
|
369
|
+
The weight of noise for added noise in diffusion step.
|
370
|
+
use_clipped_model_output (`bool`, defaults to `False`):
|
371
|
+
If `True`, computes "corrected" `model_output` from the clipped predicted original sample. Necessary
|
372
|
+
because predicted original sample is clipped to [-1, 1] when `self.config.clip_sample` is `True`. If no
|
373
|
+
clipping has happened, "corrected" `model_output` would coincide with the one provided as input and
|
374
|
+
`use_clipped_model_output` has no effect.
|
375
|
+
generator (`torch.Generator`, *optional*):
|
376
|
+
A random number generator.
|
377
|
+
variance_noise (`torch.FloatTensor`):
|
378
|
+
Alternative to generating noise with `generator` by directly providing the noise for the variance
|
379
|
+
itself. Useful for methods such as [`CycleDiffusion`].
|
380
|
+
return_dict (`bool`, *optional*, defaults to `True`):
|
381
|
+
Whether or not to return a [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] or `tuple`.
|
377
382
|
|
378
383
|
Returns:
|
379
384
|
[`~schedulers.scheduling_utils.DDIMSchedulerOutput`] or `tuple`:
|
380
|
-
|
381
|
-
|
385
|
+
If return_dict is `True`, [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] is returned, otherwise a
|
386
|
+
tuple is returned where the first element is the sample tensor.
|
382
387
|
|
383
388
|
"""
|
384
389
|
if self.num_inference_steps is None:
|
@@ -30,14 +30,14 @@ from diffusers.utils import BaseOutput, deprecate
|
|
30
30
|
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM
|
31
31
|
class DDIMSchedulerOutput(BaseOutput):
|
32
32
|
"""
|
33
|
-
Output class for the scheduler's step function output.
|
33
|
+
Output class for the scheduler's `step` function output.
|
34
34
|
|
35
35
|
Args:
|
36
36
|
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
37
|
-
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
|
37
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
38
38
|
denoising loop.
|
39
39
|
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
40
|
-
The predicted denoised sample (x_{0}) based on the model output from the current timestep.
|
40
|
+
The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
|
41
41
|
`pred_original_sample` can be used to preview progress or for guidance.
|
42
42
|
"""
|
43
43
|
|
@@ -129,47 +129,45 @@ def rescale_zero_terminal_snr(betas):
|
|
129
129
|
|
130
130
|
class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
131
131
|
"""
|
132
|
-
DDIMInverseScheduler is the reverse scheduler of [`DDIMScheduler`].
|
132
|
+
`DDIMInverseScheduler` is the reverse scheduler of [`DDIMScheduler`].
|
133
133
|
|
134
|
-
|
135
|
-
|
136
|
-
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
137
|
-
[`~SchedulerMixin.from_pretrained`] functions.
|
138
|
-
|
139
|
-
For more details, see the original paper: https://arxiv.org/abs/2010.02502
|
134
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
135
|
+
methods the library implements for all schedulers such as loading and saving.
|
140
136
|
|
141
137
|
Args:
|
142
|
-
num_train_timesteps (`int
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
138
|
+
num_train_timesteps (`int`, defaults to 1000):
|
139
|
+
The number of diffusion steps to train the model.
|
140
|
+
beta_start (`float`, defaults to 0.0001):
|
141
|
+
The starting `beta` value of inference.
|
142
|
+
beta_end (`float`, defaults to 0.02):
|
143
|
+
The final `beta` value.
|
144
|
+
beta_schedule (`str`, defaults to `"linear"`):
|
145
|
+
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
147
146
|
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
|
148
|
-
trained_betas (`np.ndarray`, optional):
|
149
|
-
|
150
|
-
clip_sample (`bool`,
|
151
|
-
|
152
|
-
clip_sample_range (`float`,
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
steps_offset (`int`,
|
159
|
-
|
160
|
-
`
|
147
|
+
trained_betas (`np.ndarray`, *optional*):
|
148
|
+
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
|
149
|
+
clip_sample (`bool`, defaults to `True`):
|
150
|
+
Clip the predicted sample for numerical stability.
|
151
|
+
clip_sample_range (`float`, defaults to 1.0):
|
152
|
+
The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
|
153
|
+
set_alpha_to_one (`bool`, defaults to `True`):
|
154
|
+
Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
|
155
|
+
there is no previous alpha. When this option is `True` the previous alpha product is fixed to 0, otherwise
|
156
|
+
it uses the alpha value at step `num_train_timesteps - 1`.
|
157
|
+
steps_offset (`int`, defaults to 0):
|
158
|
+
An offset added to the inference steps. You can use a combination of `offset=1` and
|
159
|
+
`set_alpha_to_one=False` to make the last step use `num_train_timesteps - 1` for the previous alpha
|
161
160
|
product.
|
162
|
-
prediction_type (`str`,
|
163
|
-
|
164
|
-
|
165
|
-
https://imagen.research.google/video/paper.pdf)
|
166
|
-
timestep_spacing (`str`,
|
167
|
-
The way the timesteps should be scaled. Refer to Table 2
|
168
|
-
Steps are Flawed](https://
|
169
|
-
rescale_betas_zero_snr (`bool`,
|
170
|
-
|
171
|
-
|
172
|
-
medium brightness. Loosely related to
|
161
|
+
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
162
|
+
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
163
|
+
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
164
|
+
Video](https://imagen.research.google/video/paper.pdf) paper).
|
165
|
+
timestep_spacing (`str`, defaults to `"leading"`):
|
166
|
+
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
167
|
+
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
168
|
+
rescale_betas_zero_snr (`bool`, defaults to `False`):
|
169
|
+
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
|
170
|
+
dark samples instead of limiting it to samples with medium brightness. Loosely related to
|
173
171
|
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
|
174
172
|
"""
|
175
173
|
|
@@ -243,21 +241,24 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
243
241
|
current timestep.
|
244
242
|
|
245
243
|
Args:
|
246
|
-
sample (`torch.FloatTensor`):
|
247
|
-
|
244
|
+
sample (`torch.FloatTensor`):
|
245
|
+
The input sample.
|
246
|
+
timestep (`int`, *optional*):
|
247
|
+
The current timestep in the diffusion chain.
|
248
248
|
|
249
249
|
Returns:
|
250
|
-
`torch.FloatTensor`:
|
250
|
+
`torch.FloatTensor`:
|
251
|
+
A scaled input sample.
|
251
252
|
"""
|
252
253
|
return sample
|
253
254
|
|
254
255
|
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
255
256
|
"""
|
256
|
-
Sets the discrete timesteps used for the diffusion chain
|
257
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
257
258
|
|
258
259
|
Args:
|
259
260
|
num_inference_steps (`int`):
|
260
|
-
|
261
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
261
262
|
"""
|
262
263
|
|
263
264
|
if num_inference_steps > self.config.num_train_timesteps:
|
@@ -302,6 +303,37 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
302
303
|
variance_noise: Optional[torch.FloatTensor] = None,
|
303
304
|
return_dict: bool = True,
|
304
305
|
) -> Union[DDIMSchedulerOutput, Tuple]:
|
306
|
+
"""
|
307
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
308
|
+
process from the learned model outputs (most often the predicted noise).
|
309
|
+
|
310
|
+
Args:
|
311
|
+
model_output (`torch.FloatTensor`):
|
312
|
+
The direct output from learned diffusion model.
|
313
|
+
timestep (`float`):
|
314
|
+
The current discrete timestep in the diffusion chain.
|
315
|
+
sample (`torch.FloatTensor`):
|
316
|
+
A current instance of a sample created by the diffusion process.
|
317
|
+
eta (`float`):
|
318
|
+
The weight of noise for added noise in diffusion step.
|
319
|
+
use_clipped_model_output (`bool`, defaults to `False`):
|
320
|
+
If `True`, computes "corrected" `model_output` from the clipped predicted original sample. Necessary
|
321
|
+
because predicted original sample is clipped to [-1, 1] when `self.config.clip_sample` is `True`. If no
|
322
|
+
clipping has happened, "corrected" `model_output` would coincide with the one provided as input and
|
323
|
+
`use_clipped_model_output` has no effect.
|
324
|
+
variance_noise (`torch.FloatTensor`):
|
325
|
+
Alternative to generating noise with `generator` by directly providing the noise for the variance
|
326
|
+
itself. Useful for methods such as [`CycleDiffusion`].
|
327
|
+
return_dict (`bool`, *optional*, defaults to `True`):
|
328
|
+
Whether or not to return a [`~schedulers.scheduling_ddim_inverse.DDIMInverseSchedulerOutput`] or
|
329
|
+
`tuple`.
|
330
|
+
|
331
|
+
Returns:
|
332
|
+
[`~schedulers.scheduling_ddim_inverse.DDIMInverseSchedulerOutput`] or `tuple`:
|
333
|
+
If return_dict is `True`, [`~schedulers.scheduling_ddim_inverse.DDIMInverseSchedulerOutput`] is
|
334
|
+
returned, otherwise a tuple is returned where the first element is the sample tensor.
|
335
|
+
|
336
|
+
"""
|
305
337
|
# 1. get previous step value (=t+1)
|
306
338
|
prev_timestep = timestep + self.config.num_train_timesteps // self.num_inference_steps
|
307
339
|
|