diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. diffusers/__init__.py +3 -1
  2. diffusers/commands/fp16_safetensors.py +2 -7
  3. diffusers/configuration_utils.py +23 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/loaders.py +62 -64
  6. diffusers/models/__init__.py +1 -0
  7. diffusers/models/activations.py +2 -0
  8. diffusers/models/attention.py +45 -1
  9. diffusers/models/autoencoder_tiny.py +193 -0
  10. diffusers/models/controlnet.py +1 -1
  11. diffusers/models/embeddings.py +56 -0
  12. diffusers/models/lora.py +0 -6
  13. diffusers/models/modeling_flax_utils.py +28 -2
  14. diffusers/models/modeling_utils.py +33 -16
  15. diffusers/models/transformer_2d.py +26 -9
  16. diffusers/models/unet_1d.py +2 -2
  17. diffusers/models/unet_2d_blocks.py +106 -56
  18. diffusers/models/unet_2d_condition.py +20 -5
  19. diffusers/models/vae.py +106 -1
  20. diffusers/pipelines/__init__.py +1 -0
  21. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
  22. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
  23. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
  24. diffusers/pipelines/auto_pipeline.py +33 -43
  25. diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
  26. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
  27. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
  28. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
  29. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
  30. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
  31. diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
  32. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
  33. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
  34. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
  35. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
  36. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
  37. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
  38. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
  39. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  40. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  41. diffusers/pipelines/pipeline_flax_utils.py +41 -4
  42. diffusers/pipelines/pipeline_utils.py +60 -16
  43. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
  44. diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  45. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
  46. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
  47. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
  48. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
  49. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
  50. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
  51. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
  52. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
  53. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
  54. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
  55. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
  56. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
  57. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
  58. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
  59. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
  60. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
  61. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
  65. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
  66. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
  67. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
  68. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
  69. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
  70. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
  71. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
  72. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
  73. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
  74. diffusers/schedulers/scheduling_consistency_models.py +70 -57
  75. diffusers/schedulers/scheduling_ddim.py +76 -71
  76. diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
  77. diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
  78. diffusers/schedulers/scheduling_ddpm.py +68 -67
  79. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
  80. diffusers/schedulers/scheduling_deis_multistep.py +93 -85
  81. diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
  82. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
  83. diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
  84. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
  85. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
  86. diffusers/schedulers/scheduling_euler_discrete.py +63 -56
  87. diffusers/schedulers/scheduling_heun_discrete.py +57 -45
  88. diffusers/schedulers/scheduling_ipndm.py +27 -22
  89. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
  90. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
  91. diffusers/schedulers/scheduling_karras_ve.py +55 -45
  92. diffusers/schedulers/scheduling_lms_discrete.py +58 -52
  93. diffusers/schedulers/scheduling_pndm.py +77 -62
  94. diffusers/schedulers/scheduling_repaint.py +56 -38
  95. diffusers/schedulers/scheduling_sde_ve.py +62 -50
  96. diffusers/schedulers/scheduling_sde_vp.py +32 -11
  97. diffusers/schedulers/scheduling_unclip.py +3 -3
  98. diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
  99. diffusers/schedulers/scheduling_utils.py +41 -35
  100. diffusers/schedulers/scheduling_utils_flax.py +8 -2
  101. diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
  102. diffusers/utils/__init__.py +2 -2
  103. diffusers/utils/dummy_pt_objects.py +15 -0
  104. diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
  105. diffusers/utils/hub_utils.py +105 -2
  106. diffusers/utils/import_utils.py +0 -4
  107. diffusers/utils/pil_utils.py +19 -0
  108. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
  109. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
  110. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
  111. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
  112. diffusers/models/cross_attention.py +0 -94
  113. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
  114. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -56,78 +56,62 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
56
56
 
57
57
  class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
58
58
  """
59
- UniPC is a training-free framework designed for the fast sampling of diffusion models, which consists of a
60
- corrector (UniC) and a predictor (UniP) that share a unified analytical form and support arbitrary orders. UniPC is
61
- by desinged model-agnostic, supporting pixel-space/latent-space DPMs on unconditional/conditional sampling. It can
62
- also be applied to both noise prediction model and data prediction model. The corrector UniC can be also applied
63
- after any off-the-shelf solvers to increase the order of accuracy.
59
+ `UniPCMultistepScheduler` is a training-free framework designed for the fast sampling of diffusion models.
64
60
 
65
- For more details, see the original paper: https://arxiv.org/abs/2302.04867
66
-
67
- Currently, we support the multistep UniPC for both noise prediction models and data prediction models. We recommend
68
- to use `solver_order=2` for guided sampling, and `solver_order=3` for unconditional sampling.
69
-
70
- We also support the "dynamic thresholding" method in Imagen (https://arxiv.org/abs/2205.11487). For pixel-space
71
- diffusion models, you can set both `predict_x0=True` and `thresholding=True` to use the dynamic thresholding. Note
72
- that the thresholding method is unsuitable for latent-space diffusion models (such as stable-diffusion).
73
-
74
- [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
75
- function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
76
- [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
77
- [`~SchedulerMixin.from_pretrained`] functions.
61
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
62
+ methods the library implements for all schedulers such as loading and saving.
78
63
 
79
64
  Args:
80
- num_train_timesteps (`int`): number of diffusion steps used to train the model.
81
- beta_start (`float`): the starting `beta` value of inference.
82
- beta_end (`float`): the final `beta` value.
83
- beta_schedule (`str`):
84
- the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
65
+ num_train_timesteps (`int`, defaults to 1000):
66
+ The number of diffusion steps to train the model.
67
+ beta_start (`float`, defaults to 0.0001):
68
+ The starting `beta` value of inference.
69
+ beta_end (`float`, defaults to 0.02):
70
+ The final `beta` value.
71
+ beta_schedule (`str`, defaults to `"linear"`):
72
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
85
73
  `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
86
- trained_betas (`np.ndarray`, optional):
87
- option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
74
+ trained_betas (`np.ndarray`, *optional*):
75
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
88
76
  solver_order (`int`, default `2`):
89
- the order of UniPC, also the p in UniPC-p; can be any positive integer. Note that the effective order of
90
- accuracy is `solver_order + 1` due to the UniC. We recommend to use `solver_order=2` for guided sampling,
91
- and `solver_order=3` for unconditional sampling.
92
- prediction_type (`str`, default `epsilon`, optional):
93
- prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
94
- process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
95
- https://imagen.research.google/video/paper.pdf)
96
- thresholding (`bool`, default `False`):
97
- whether to use the "dynamic thresholding" method (introduced by Imagen, https://arxiv.org/abs/2205.11487).
98
- For pixel-space diffusion models, you can set both `predict_x0=True` and `thresholding=True` to use the
99
- dynamic thresholding. Note that the thresholding method is unsuitable for latent-space diffusion models
100
- (such as stable-diffusion).
101
- dynamic_thresholding_ratio (`float`, default `0.995`):
102
- the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
103
- (https://arxiv.org/abs/2205.11487).
104
- sample_max_value (`float`, default `1.0`):
105
- the threshold value for dynamic thresholding. Valid only when `thresholding=True` and `predict_x0=True`.
106
- predict_x0 (`bool`, default `True`):
107
- whether to use the updating algrithm on the predicted x0. See https://arxiv.org/abs/2211.01095 for details
77
+ The UniPC order which can be any positive integer. The effective order of accuracy is `solver_order + 1`
78
+ due to the UniC. It is recommended to use `solver_order=2` for guided sampling, and `solver_order=3` for
79
+ unconditional sampling.
80
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
81
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
82
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
83
+ Video](https://imagen.research.google/video/paper.pdf) paper).
84
+ thresholding (`bool`, defaults to `False`):
85
+ Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
86
+ as Stable Diffusion.
87
+ dynamic_thresholding_ratio (`float`, defaults to 0.995):
88
+ The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
89
+ sample_max_value (`float`, defaults to 1.0):
90
+ The threshold value for dynamic thresholding. Valid only when `thresholding=True` and `predict_x0=True`.
91
+ predict_x0 (`bool`, defaults to `True`):
92
+ Whether to use the updating algorithm on the predicted x0.
108
93
  solver_type (`str`, default `bh2`):
109
- the solver type of UniPC. We recommend use `bh1` for unconditional sampling when steps < 10, and use `bh2`
94
+ Solver type for UniPC. It is recommended to use `bh1` for unconditional sampling when steps < 10, and `bh2`
110
95
  otherwise.
111
96
  lower_order_final (`bool`, default `True`):
112
- whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. We empirically
113
- find this trick can stabilize the sampling of DPM-Solver for steps < 15, especially for steps <= 10.
97
+ Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can
98
+ stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10.
114
99
  disable_corrector (`list`, default `[]`):
115
- decide which step to disable the corrector. For large guidance scale, the misalignment between the
116
- `epsilon_theta(x_t, c)`and `epsilon_theta(x_t^c, c)` might influence the convergence. This can be mitigated
117
- by disable the corrector at the first few steps (e.g., disable_corrector=[0])
100
+ Decides which step to disable the corrector to mitigate the misalignment between `epsilon_theta(x_t, c)`
101
+ and `epsilon_theta(x_t^c, c)` which can influence convergence for a large guidance scale. Corrector is
102
+ usually disabled during the first few steps.
118
103
  solver_p (`SchedulerMixin`, default `None`):
119
- can be any other scheduler. If specified, the algorithm will become solver_p + UniC.
104
+ Any other scheduler that if specified, the algorithm becomes `solver_p + UniC`.
120
105
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
121
- This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
122
- noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
123
- of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
124
- timestep_spacing (`str`, default `"linspace"`):
125
- The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
126
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
127
- steps_offset (`int`, default `0`):
128
- an offset added to the inference steps. You can use a combination of `offset=1` and
129
- `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
130
- stable diffusion.
106
+ Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
107
+ the sigmas are determined according to a sequence of noise levels {σi}.
108
+ timestep_spacing (`str`, defaults to `"linspace"`):
109
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
110
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
111
+ steps_offset (`int`, defaults to 0):
112
+ An offset added to the inference steps. You can use a combination of `offset=1` and
113
+ `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
114
+ Diffusion.
131
115
  """
132
116
 
133
117
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -200,13 +184,13 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
200
184
 
201
185
  def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
202
186
  """
203
- Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
187
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
204
188
 
205
189
  Args:
206
190
  num_inference_steps (`int`):
207
- the number of diffusion steps used when generating samples with a pre-trained model.
208
- device (`str` or `torch.device`, optional):
209
- the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
191
+ The number of diffusion steps used when generating samples with a pre-trained model.
192
+ device (`str` or `torch.device`, *optional*):
193
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
210
194
  """
211
195
  # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
212
196
  if self.config.timestep_spacing == "linspace":
@@ -294,20 +278,61 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
294
278
 
295
279
  return sample
296
280
 
281
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
282
+ def _sigma_to_t(self, sigma, log_sigmas):
283
+ # get log sigma
284
+ log_sigma = np.log(sigma)
285
+
286
+ # get distribution
287
+ dists = log_sigma - log_sigmas[:, np.newaxis]
288
+
289
+ # get sigmas range
290
+ low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2)
291
+ high_idx = low_idx + 1
292
+
293
+ low = log_sigmas[low_idx]
294
+ high = log_sigmas[high_idx]
295
+
296
+ # interpolate sigmas
297
+ w = (low - log_sigma) / (low - high)
298
+ w = np.clip(w, 0, 1)
299
+
300
+ # transform interpolation to time range
301
+ t = (1 - w) * low_idx + w * high_idx
302
+ t = t.reshape(sigma.shape)
303
+ return t
304
+
305
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
306
+ def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
307
+ """Constructs the noise schedule of Karras et al. (2022)."""
308
+
309
+ sigma_min: float = in_sigmas[-1].item()
310
+ sigma_max: float = in_sigmas[0].item()
311
+
312
+ rho = 7.0 # 7.0 is the value used in the paper
313
+ ramp = np.linspace(0, 1, num_inference_steps)
314
+ min_inv_rho = sigma_min ** (1 / rho)
315
+ max_inv_rho = sigma_max ** (1 / rho)
316
+ sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
317
+ return sigmas
318
+
297
319
  def convert_model_output(
298
320
  self, model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor
299
321
  ) -> torch.FloatTensor:
300
322
  r"""
301
- Convert the model output to the corresponding type that the algorithm PC needs.
323
+ Convert the model output to the corresponding type the UniPC algorithm needs.
302
324
 
303
325
  Args:
304
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
305
- timestep (`int`): current discrete timestep in the diffusion chain.
326
+ model_output (`torch.FloatTensor`):
327
+ The direct output from the learned diffusion model.
328
+ timestep (`int`):
329
+ The current discrete timestep in the diffusion chain.
306
330
  sample (`torch.FloatTensor`):
307
- current instance of sample being created by diffusion process.
331
+ A current instance of a sample created by the diffusion process.
308
332
 
309
333
  Returns:
310
- `torch.FloatTensor`: the converted model output.
334
+ `torch.FloatTensor`:
335
+ The converted model output.
311
336
  """
312
337
  if self.predict_x0:
313
338
  if self.config.prediction_type == "epsilon":
@@ -357,14 +382,17 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
357
382
 
358
383
  Args:
359
384
  model_output (`torch.FloatTensor`):
360
- direct outputs from learned diffusion model at the current timestep.
361
- prev_timestep (`int`): previous discrete timestep in the diffusion chain.
385
+ The direct output from the learned diffusion model at the current timestep.
386
+ prev_timestep (`int`):
387
+ The previous discrete timestep in the diffusion chain.
362
388
  sample (`torch.FloatTensor`):
363
- current instance of sample being created by diffusion process.
364
- order (`int`): the order of UniP at this step, also the p in UniPC-p.
389
+ A current instance of a sample created by the diffusion process.
390
+ order (`int`):
391
+ The order of UniP at this timestep (corresponds to the *p* in UniPC-p).
365
392
 
366
393
  Returns:
367
- `torch.FloatTensor`: the sample tensor at the previous timestep.
394
+ `torch.FloatTensor`:
395
+ The sample tensor at the previous timestep.
368
396
  """
369
397
  timestep_list = self.timestep_list
370
398
  model_output_list = self.model_outputs
@@ -462,15 +490,20 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
462
490
  One step for the UniC (B(h) version).
463
491
 
464
492
  Args:
465
- this_model_output (`torch.FloatTensor`): the model outputs at `x_t`
466
- this_timestep (`int`): the current timestep `t`
467
- last_sample (`torch.FloatTensor`): the generated sample before the last predictor: `x_{t-1}`
468
- this_sample (`torch.FloatTensor`): the generated sample after the last predictor: `x_{t}`
469
- order (`int`): the `p` of UniC-p at this step. Note that the effective order of accuracy
470
- should be order + 1
493
+ this_model_output (`torch.FloatTensor`):
494
+ The model outputs at `x_t`.
495
+ this_timestep (`int`):
496
+ The current timestep `t`.
497
+ last_sample (`torch.FloatTensor`):
498
+ The generated sample before the last predictor `x_{t-1}`.
499
+ this_sample (`torch.FloatTensor`):
500
+ The generated sample after the last predictor `x_{t}`.
501
+ order (`int`):
502
+ The `p` of UniC-p at this step. The effective order of accuracy should be `order + 1`.
471
503
 
472
504
  Returns:
473
- `torch.FloatTensor`: the corrected sample tensor at the current timestep.
505
+ `torch.FloatTensor`:
506
+ The corrected sample tensor at the current timestep.
474
507
  """
475
508
  timestep_list = self.timestep_list
476
509
  model_output_list = self.model_outputs
@@ -564,18 +597,23 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
564
597
  return_dict: bool = True,
565
598
  ) -> Union[SchedulerOutput, Tuple]:
566
599
  """
567
- Step function propagating the sample with the multistep UniPC.
600
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
601
+ the multistep UniPC.
568
602
 
569
603
  Args:
570
- model_output (`torch.FloatTensor`): direct output from learned diffusion model.
571
- timestep (`int`): current discrete timestep in the diffusion chain.
604
+ model_output (`torch.FloatTensor`):
605
+ The direct output from learned diffusion model.
606
+ timestep (`int`):
607
+ The current discrete timestep in the diffusion chain.
572
608
  sample (`torch.FloatTensor`):
573
- current instance of sample being created by diffusion process.
574
- return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
609
+ A current instance of a sample created by the diffusion process.
610
+ return_dict (`bool`):
611
+ Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`.
575
612
 
576
613
  Returns:
577
- [`~scheduling_utils.SchedulerOutput`] or `tuple`: [`~scheduling_utils.SchedulerOutput`] if `return_dict` is
578
- True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor.
614
+ [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
615
+ If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
616
+ tuple is returned where the first element is the sample tensor.
579
617
 
580
618
  """
581
619
 
@@ -646,10 +684,12 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
646
684
  current timestep.
647
685
 
648
686
  Args:
649
- sample (`torch.FloatTensor`): input sample
687
+ sample (`torch.FloatTensor`):
688
+ The input sample.
650
689
 
651
690
  Returns:
652
- `torch.FloatTensor`: scaled input sample
691
+ `torch.FloatTensor`:
692
+ A scaled input sample.
653
693
  """
654
694
  return sample
655
695
 
@@ -19,7 +19,7 @@ from typing import Any, Dict, Optional, Union
19
19
 
20
20
  import torch
21
21
 
22
- from ..utils import BaseOutput
22
+ from ..utils import BaseOutput, PushToHubMixin
23
23
 
24
24
 
25
25
  SCHEDULER_CONFIG_NAME = "scheduler_config.json"
@@ -49,24 +49,30 @@ class KarrasDiffusionSchedulers(Enum):
49
49
  @dataclass
50
50
  class SchedulerOutput(BaseOutput):
51
51
  """
52
- Base class for the scheduler's step function output.
52
+ Base class for the output of a scheduler's `step` function.
53
53
 
54
54
  Args:
55
55
  prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
56
- Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
56
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
57
57
  denoising loop.
58
58
  """
59
59
 
60
60
  prev_sample: torch.FloatTensor
61
61
 
62
62
 
63
- class SchedulerMixin:
63
+ class SchedulerMixin(PushToHubMixin):
64
64
  """
65
- Mixin containing common functions for the schedulers.
65
+ Base class for all schedulers.
66
+
67
+ [`SchedulerMixin`] contains common functions shared by all schedulers such as general loading and saving
68
+ functionalities.
69
+
70
+ [`ConfigMixin`] takes care of storing the configuration attributes (like `num_train_timesteps`) that are passed to
71
+ the scheduler's `__init__` function, and the attributes can be accessed by `scheduler.config.num_train_timesteps`.
66
72
 
67
73
  Class attributes:
68
- - **_compatibles** (`List[str]`) -- A list of classes that are compatible with the parent class, so that
69
- `from_config` can be used from a class different than the one used to save the config (should be overridden
74
+ - **_compatibles** (`List[str]`) -- A list of scheduler classes that are compatible with the parent scheduler
75
+ class. Use [`~ConfigMixin.from_config`] to load a different compatible scheduler class (should be overridden
70
76
  by parent class).
71
77
  """
72
78
 
@@ -83,56 +89,50 @@ class SchedulerMixin:
83
89
  **kwargs,
84
90
  ):
85
91
  r"""
86
- Instantiate a Scheduler class from a pre-defined JSON configuration file inside a directory or Hub repo.
92
+ Instantiate a scheduler from a pre-defined JSON configuration file in a local directory or Hub repository.
87
93
 
88
94
  Parameters:
89
95
  pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*):
90
96
  Can be either:
91
97
 
92
- - A string, the *model id* of a model repo on huggingface.co. Valid model ids should have an
93
- organization name, like `google/ddpm-celebahq-256`.
94
- - A path to a *directory* containing the schedluer configurations saved using
95
- [`~SchedulerMixin.save_pretrained`], e.g., `./my_model_directory/`.
98
+ - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
99
+ the Hub.
100
+ - A path to a *directory* (for example `./my_model_directory`) containing the scheduler
101
+ configuration saved with [`~SchedulerMixin.save_pretrained`].
96
102
  subfolder (`str`, *optional*):
97
- In case the relevant files are located inside a subfolder of the model repo (either remote in
98
- huggingface.co or downloaded locally), you can specify the folder name here.
103
+ The subfolder location of a model file within a larger model repository on the Hub or locally.
99
104
  return_unused_kwargs (`bool`, *optional*, defaults to `False`):
100
105
  Whether kwargs that are not consumed by the Python class should be returned or not.
101
106
  cache_dir (`Union[str, os.PathLike]`, *optional*):
102
- Path to a directory in which a downloaded pretrained model configuration should be cached if the
103
- standard cache should not be used.
107
+ Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
108
+ is not used.
104
109
  force_download (`bool`, *optional*, defaults to `False`):
105
110
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
106
111
  cached versions if they exist.
107
112
  resume_download (`bool`, *optional*, defaults to `False`):
108
- Whether or not to delete incompletely received files. Will attempt to resume the download if such a
109
- file exists.
113
+ Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
114
+ incompletely downloaded files are deleted.
110
115
  proxies (`Dict[str, str]`, *optional*):
111
- A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
116
+ A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
112
117
  'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
113
118
  output_loading_info(`bool`, *optional*, defaults to `False`):
114
119
  Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.
115
120
  local_files_only(`bool`, *optional*, defaults to `False`):
116
- Whether or not to only look at local files (i.e., do not try to download the model).
121
+ Whether to only load local model weights and configuration files or not. If set to `True`, the model
122
+ won't be downloaded from the Hub.
117
123
  use_auth_token (`str` or *bool*, *optional*):
118
- The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
119
- when running `transformers-cli login` (stored in `~/.huggingface`).
124
+ The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
125
+ `diffusers-cli login` (stored in `~/.huggingface`) is used.
120
126
  revision (`str`, *optional*, defaults to `"main"`):
121
- The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
122
- git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
123
- identifier allowed by git.
124
-
125
- <Tip>
126
-
127
- It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated
128
- models](https://huggingface.co/docs/hub/models-gated#gated-models).
129
-
130
- </Tip>
127
+ The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
128
+ allowed by Git.
131
129
 
132
130
  <Tip>
133
131
 
134
- Activate the special ["offline-mode"](https://huggingface.co/transformers/installation.html#offline-mode) to
135
- use this method in a firewalled environment.
132
+ To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with
133
+ `huggingface-cli login`. You can also activate the special
134
+ ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use this method in a
135
+ firewalled environment.
136
136
 
137
137
  </Tip>
138
138
 
@@ -148,12 +148,18 @@ class SchedulerMixin:
148
148
 
149
149
  def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs):
150
150
  """
151
- Save a scheduler configuration object to the directory `save_directory`, so that it can be re-loaded using the
151
+ Save a scheduler configuration object to a directory so that it can be reloaded using the
152
152
  [`~SchedulerMixin.from_pretrained`] class method.
153
153
 
154
154
  Args:
155
155
  save_directory (`str` or `os.PathLike`):
156
156
  Directory where the configuration JSON file will be saved (will be created if it does not exist).
157
+ push_to_hub (`bool`, *optional*, defaults to `False`):
158
+ Whether or not to push your model to the Hugging Face Hub after saving it. You can specify the
159
+ repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
160
+ namespace).
161
+ kwargs (`Dict[str, Any]`, *optional*):
162
+ Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
157
163
  """
158
164
  self.save_config(save_directory=save_directory, push_to_hub=push_to_hub, **kwargs)
159
165
 
@@ -21,7 +21,7 @@ from typing import Any, Dict, Optional, Tuple, Union
21
21
  import flax
22
22
  import jax.numpy as jnp
23
23
 
24
- from ..utils import BaseOutput
24
+ from ..utils import BaseOutput, PushToHubMixin
25
25
 
26
26
 
27
27
  SCHEDULER_CONFIG_NAME = "scheduler_config.json"
@@ -53,7 +53,7 @@ class FlaxSchedulerOutput(BaseOutput):
53
53
  prev_sample: jnp.ndarray
54
54
 
55
55
 
56
- class FlaxSchedulerMixin:
56
+ class FlaxSchedulerMixin(PushToHubMixin):
57
57
  """
58
58
  Mixin containing common functions for the schedulers.
59
59
 
@@ -156,6 +156,12 @@ class FlaxSchedulerMixin:
156
156
  Args:
157
157
  save_directory (`str` or `os.PathLike`):
158
158
  Directory where the configuration JSON file will be saved (will be created if it does not exist).
159
+ push_to_hub (`bool`, *optional*, defaults to `False`):
160
+ Whether or not to push your model to the Hugging Face Hub after saving it. You can specify the
161
+ repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
162
+ namespace).
163
+ kwargs (`Dict[str, Any]`, *optional*):
164
+ Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
159
165
  """
160
166
  self.save_config(save_directory=save_directory, push_to_hub=push_to_hub, **kwargs)
161
167