diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. diffusers/__init__.py +26 -1
  2. diffusers/configuration_utils.py +34 -29
  3. diffusers/dependency_versions_table.py +4 -0
  4. diffusers/image_processor.py +125 -12
  5. diffusers/loaders.py +169 -203
  6. diffusers/models/attention.py +24 -1
  7. diffusers/models/attention_flax.py +10 -5
  8. diffusers/models/attention_processor.py +3 -0
  9. diffusers/models/autoencoder_kl.py +114 -33
  10. diffusers/models/controlnet.py +131 -14
  11. diffusers/models/controlnet_flax.py +37 -26
  12. diffusers/models/cross_attention.py +17 -17
  13. diffusers/models/embeddings.py +67 -0
  14. diffusers/models/modeling_flax_utils.py +64 -56
  15. diffusers/models/modeling_utils.py +193 -104
  16. diffusers/models/prior_transformer.py +207 -37
  17. diffusers/models/resnet.py +26 -26
  18. diffusers/models/transformer_2d.py +36 -41
  19. diffusers/models/transformer_temporal.py +24 -21
  20. diffusers/models/unet_1d.py +31 -25
  21. diffusers/models/unet_2d.py +43 -30
  22. diffusers/models/unet_2d_blocks.py +210 -89
  23. diffusers/models/unet_2d_blocks_flax.py +12 -12
  24. diffusers/models/unet_2d_condition.py +172 -64
  25. diffusers/models/unet_2d_condition_flax.py +38 -24
  26. diffusers/models/unet_3d_blocks.py +34 -31
  27. diffusers/models/unet_3d_condition.py +101 -34
  28. diffusers/models/vae.py +5 -5
  29. diffusers/models/vae_flax.py +37 -34
  30. diffusers/models/vq_model.py +23 -14
  31. diffusers/pipelines/__init__.py +24 -1
  32. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
  33. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
  34. diffusers/pipelines/consistency_models/__init__.py +1 -0
  35. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
  36. diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
  37. diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
  38. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
  39. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
  40. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  41. diffusers/pipelines/kandinsky/__init__.py +1 -1
  42. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
  43. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
  44. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
  45. diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
  46. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
  47. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
  48. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
  49. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
  50. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
  51. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
  52. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
  53. diffusers/pipelines/pipeline_flax_utils.py +2 -2
  54. diffusers/pipelines/pipeline_utils.py +124 -146
  55. diffusers/pipelines/shap_e/__init__.py +27 -0
  56. diffusers/pipelines/shap_e/camera.py +147 -0
  57. diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
  58. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
  59. diffusers/pipelines/shap_e/renderer.py +709 -0
  60. diffusers/pipelines/stable_diffusion/__init__.py +2 -0
  61. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
  65. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
  66. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
  67. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  68. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
  69. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
  70. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
  71. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
  72. diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
  73. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
  74. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
  75. diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
  76. diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
  77. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
  78. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
  79. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
  80. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
  81. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
  82. diffusers/schedulers/__init__.py +3 -0
  83. diffusers/schedulers/scheduling_consistency_models.py +380 -0
  84. diffusers/schedulers/scheduling_ddim.py +28 -6
  85. diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
  86. diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
  87. diffusers/schedulers/scheduling_ddpm.py +53 -7
  88. diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
  89. diffusers/schedulers/scheduling_deis_multistep.py +66 -11
  90. diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
  91. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
  92. diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
  93. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
  94. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
  95. diffusers/schedulers/scheduling_euler_discrete.py +58 -8
  96. diffusers/schedulers/scheduling_heun_discrete.py +89 -14
  97. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
  98. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
  99. diffusers/schedulers/scheduling_lms_discrete.py +57 -8
  100. diffusers/schedulers/scheduling_pndm.py +46 -10
  101. diffusers/schedulers/scheduling_repaint.py +19 -4
  102. diffusers/schedulers/scheduling_sde_ve.py +5 -1
  103. diffusers/schedulers/scheduling_unclip.py +43 -4
  104. diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
  105. diffusers/training_utils.py +1 -1
  106. diffusers/utils/__init__.py +2 -1
  107. diffusers/utils/dummy_pt_objects.py +60 -0
  108. diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
  109. diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
  110. diffusers/utils/hub_utils.py +1 -1
  111. diffusers/utils/import_utils.py +20 -3
  112. diffusers/utils/logging.py +15 -18
  113. diffusers/utils/outputs.py +3 -3
  114. diffusers/utils/testing_utils.py +15 -0
  115. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
  116. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
  117. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
  118. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
  119. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
  120. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
@@ -45,7 +45,11 @@ class LMSDiscreteSchedulerOutput(BaseOutput):
45
45
 
46
46
 
47
47
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
48
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
48
+ def betas_for_alpha_bar(
49
+ num_diffusion_timesteps,
50
+ max_beta=0.999,
51
+ alpha_transform_type="cosine",
52
+ ):
49
53
  """
50
54
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
51
55
  (1-beta) over time from t = [0,1].
@@ -58,19 +62,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
58
62
  num_diffusion_timesteps (`int`): the number of betas to produce.
59
63
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
60
64
  prevent singularities.
65
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
66
+ Choose from `cosine` or `exp`
61
67
 
62
68
  Returns:
63
69
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
64
70
  """
71
+ if alpha_transform_type == "cosine":
65
72
 
66
- def alpha_bar(time_step):
67
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
73
+ def alpha_bar_fn(t):
74
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
75
+
76
+ elif alpha_transform_type == "exp":
77
+
78
+ def alpha_bar_fn(t):
79
+ return math.exp(t * -12.0)
80
+
81
+ else:
82
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
68
83
 
69
84
  betas = []
70
85
  for i in range(num_diffusion_timesteps):
71
86
  t1 = i / num_diffusion_timesteps
72
87
  t2 = (i + 1) / num_diffusion_timesteps
73
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
88
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
74
89
  return torch.tensor(betas, dtype=torch.float32)
75
90
 
76
91
 
@@ -102,6 +117,13 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
102
117
  prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
103
118
  process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
104
119
  https://imagen.research.google/video/paper.pdf)
120
+ timestep_spacing (`str`, default `"linspace"`):
121
+ The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
122
+ Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
123
+ steps_offset (`int`, default `0`):
124
+ an offset added to the inference steps. You can use a combination of `offset=1` and
125
+ `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
126
+ stable diffusion.
105
127
  """
106
128
 
107
129
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -117,6 +139,8 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
117
139
  trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
118
140
  use_karras_sigmas: Optional[bool] = False,
119
141
  prediction_type: str = "epsilon",
142
+ timestep_spacing: str = "linspace",
143
+ steps_offset: int = 0,
120
144
  ):
121
145
  if trained_betas is not None:
122
146
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -140,9 +164,6 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
140
164
  sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
141
165
  self.sigmas = torch.from_numpy(sigmas)
142
166
 
143
- # standard deviation of the initial noise distribution
144
- self.init_noise_sigma = self.sigmas.max()
145
-
146
167
  # setable values
147
168
  self.num_inference_steps = None
148
169
  self.use_karras_sigmas = use_karras_sigmas
@@ -150,6 +171,14 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
150
171
  self.derivatives = []
151
172
  self.is_scale_input_called = False
152
173
 
174
+ @property
175
+ def init_noise_sigma(self):
176
+ # standard deviation of the initial noise distribution
177
+ if self.config.timestep_spacing in ["linspace", "trailing"]:
178
+ return self.sigmas.max()
179
+
180
+ return (self.sigmas.max() ** 2 + 1) ** 0.5
181
+
153
182
  def scale_model_input(
154
183
  self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
155
184
  ) -> torch.FloatTensor:
@@ -205,7 +234,27 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
205
234
  """
206
235
  self.num_inference_steps = num_inference_steps
207
236
 
208
- timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
237
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
238
+ if self.config.timestep_spacing == "linspace":
239
+ timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=float)[
240
+ ::-1
241
+ ].copy()
242
+ elif self.config.timestep_spacing == "leading":
243
+ step_ratio = self.config.num_train_timesteps // self.num_inference_steps
244
+ # creates integer timesteps by multiplying by ratio
245
+ # casting to int to avoid issues when num_inference_step is power of 3
246
+ timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float)
247
+ timesteps += self.config.steps_offset
248
+ elif self.config.timestep_spacing == "trailing":
249
+ step_ratio = self.config.num_train_timesteps / self.num_inference_steps
250
+ # creates integer timesteps by multiplying by ratio
251
+ # casting to int to avoid issues when num_inference_step is power of 3
252
+ timesteps = (np.arange(self.config.num_train_timesteps, 0, -step_ratio)).round().copy().astype(float)
253
+ timesteps -= 1
254
+ else:
255
+ raise ValueError(
256
+ f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
257
+ )
209
258
 
210
259
  sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
211
260
  log_sigmas = np.log(sigmas)
@@ -25,7 +25,11 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
25
25
 
26
26
 
27
27
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
28
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
28
+ def betas_for_alpha_bar(
29
+ num_diffusion_timesteps,
30
+ max_beta=0.999,
31
+ alpha_transform_type="cosine",
32
+ ):
29
33
  """
30
34
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
31
35
  (1-beta) over time from t = [0,1].
@@ -38,19 +42,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
38
42
  num_diffusion_timesteps (`int`): the number of betas to produce.
39
43
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
40
44
  prevent singularities.
45
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
46
+ Choose from `cosine` or `exp`
41
47
 
42
48
  Returns:
43
49
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
44
50
  """
51
+ if alpha_transform_type == "cosine":
45
52
 
46
- def alpha_bar(time_step):
47
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
53
+ def alpha_bar_fn(t):
54
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
55
+
56
+ elif alpha_transform_type == "exp":
57
+
58
+ def alpha_bar_fn(t):
59
+ return math.exp(t * -12.0)
60
+
61
+ else:
62
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
48
63
 
49
64
  betas = []
50
65
  for i in range(num_diffusion_timesteps):
51
66
  t1 = i / num_diffusion_timesteps
52
67
  t2 = (i + 1) / num_diffusion_timesteps
53
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
68
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
54
69
  return torch.tensor(betas, dtype=torch.float32)
55
70
 
56
71
 
@@ -85,11 +100,13 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
85
100
  prediction_type (`str`, default `epsilon`, optional):
86
101
  prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion process)
87
102
  or `v_prediction` (see section 2.4 https://imagen.research.google/video/paper.pdf)
103
+ timestep_spacing (`str`, default `"leading"`):
104
+ The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
105
+ Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
88
106
  steps_offset (`int`, default `0`):
89
107
  an offset added to the inference steps. You can use a combination of `offset=1` and
90
108
  `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
91
109
  stable diffusion.
92
-
93
110
  """
94
111
 
95
112
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -106,6 +123,7 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
106
123
  skip_prk_steps: bool = False,
107
124
  set_alpha_to_one: bool = False,
108
125
  prediction_type: str = "epsilon",
126
+ timestep_spacing: str = "leading",
109
127
  steps_offset: int = 0,
110
128
  ):
111
129
  if trained_betas is not None:
@@ -159,11 +177,29 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
159
177
  """
160
178
 
161
179
  self.num_inference_steps = num_inference_steps
162
- step_ratio = self.config.num_train_timesteps // self.num_inference_steps
163
- # creates integer timesteps by multiplying by ratio
164
- # casting to int to avoid issues when num_inference_step is power of 3
165
- self._timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()
166
- self._timesteps += self.config.steps_offset
180
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
181
+ if self.config.timestep_spacing == "linspace":
182
+ self._timesteps = (
183
+ np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps).round().astype(np.int64)
184
+ )
185
+ elif self.config.timestep_spacing == "leading":
186
+ step_ratio = self.config.num_train_timesteps // self.num_inference_steps
187
+ # creates integer timesteps by multiplying by ratio
188
+ # casting to int to avoid issues when num_inference_step is power of 3
189
+ self._timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()
190
+ self._timesteps += self.config.steps_offset
191
+ elif self.config.timestep_spacing == "trailing":
192
+ step_ratio = self.config.num_train_timesteps / self.num_inference_steps
193
+ # creates integer timesteps by multiplying by ratio
194
+ # casting to int to avoid issues when num_inference_step is power of 3
195
+ self._timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio))[::-1].astype(
196
+ np.int64
197
+ )
198
+ self._timesteps -= 1
199
+ else:
200
+ raise ValueError(
201
+ f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
202
+ )
167
203
 
168
204
  if self.config.skip_prk_steps:
169
205
  # for some models like stable diffusion the prk steps can/should be skipped to
@@ -43,7 +43,11 @@ class RePaintSchedulerOutput(BaseOutput):
43
43
 
44
44
 
45
45
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
46
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
46
+ def betas_for_alpha_bar(
47
+ num_diffusion_timesteps,
48
+ max_beta=0.999,
49
+ alpha_transform_type="cosine",
50
+ ):
47
51
  """
48
52
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
49
53
  (1-beta) over time from t = [0,1].
@@ -56,19 +60,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
56
60
  num_diffusion_timesteps (`int`): the number of betas to produce.
57
61
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
58
62
  prevent singularities.
63
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
64
+ Choose from `cosine` or `exp`
59
65
 
60
66
  Returns:
61
67
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
62
68
  """
69
+ if alpha_transform_type == "cosine":
63
70
 
64
- def alpha_bar(time_step):
65
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
71
+ def alpha_bar_fn(t):
72
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
73
+
74
+ elif alpha_transform_type == "exp":
75
+
76
+ def alpha_bar_fn(t):
77
+ return math.exp(t * -12.0)
78
+
79
+ else:
80
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
66
81
 
67
82
  betas = []
68
83
  for i in range(num_diffusion_timesteps):
69
84
  t1 = i / num_diffusion_timesteps
70
85
  t2 = (i + 1) / num_diffusion_timesteps
71
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
86
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
72
87
  return torch.tensor(betas, dtype=torch.float32)
73
88
 
74
89
 
@@ -276,7 +276,11 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
276
276
  # Make sure sigmas and timesteps have the same device and dtype as original_samples
277
277
  timesteps = timesteps.to(original_samples.device)
278
278
  sigmas = self.discrete_sigmas.to(original_samples.device)[timesteps]
279
- noise = torch.randn_like(original_samples) * sigmas[:, None, None, None]
279
+ noise = (
280
+ noise * sigmas[:, None, None, None]
281
+ if noise is not None
282
+ else torch.randn_like(original_samples) * sigmas[:, None, None, None]
283
+ )
280
284
  noisy_samples = noise + original_samples
281
285
  return noisy_samples
282
286
 
@@ -44,7 +44,11 @@ class UnCLIPSchedulerOutput(BaseOutput):
44
44
 
45
45
 
46
46
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
47
- def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
47
+ def betas_for_alpha_bar(
48
+ num_diffusion_timesteps,
49
+ max_beta=0.999,
50
+ alpha_transform_type="cosine",
51
+ ):
48
52
  """
49
53
  Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
50
54
  (1-beta) over time from t = [0,1].
@@ -57,19 +61,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
57
61
  num_diffusion_timesteps (`int`): the number of betas to produce.
58
62
  max_beta (`float`): the maximum beta to use; use values lower than 1 to
59
63
  prevent singularities.
64
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
65
+ Choose from `cosine` or `exp`
60
66
 
61
67
  Returns:
62
68
  betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
63
69
  """
70
+ if alpha_transform_type == "cosine":
64
71
 
65
- def alpha_bar(time_step):
66
- return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
72
+ def alpha_bar_fn(t):
73
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
74
+
75
+ elif alpha_transform_type == "exp":
76
+
77
+ def alpha_bar_fn(t):
78
+ return math.exp(t * -12.0)
79
+
80
+ else:
81
+ raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
67
82
 
68
83
  betas = []
69
84
  for i in range(num_diffusion_timesteps):
70
85
  t1 = i / num_diffusion_timesteps
71
86
  t2 = (i + 1) / num_diffusion_timesteps
72
- betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
87
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
73
88
  return torch.tensor(betas, dtype=torch.float32)
74
89
 
75
90
 
@@ -307,3 +322,27 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
307
322
  return (pred_prev_sample,)
308
323
 
309
324
  return UnCLIPSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample)
325
+
326
+ # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
327
+ def add_noise(
328
+ self,
329
+ original_samples: torch.FloatTensor,
330
+ noise: torch.FloatTensor,
331
+ timesteps: torch.IntTensor,
332
+ ) -> torch.FloatTensor:
333
+ # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
334
+ alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
335
+ timesteps = timesteps.to(original_samples.device)
336
+
337
+ sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
338
+ sqrt_alpha_prod = sqrt_alpha_prod.flatten()
339
+ while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
340
+ sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
341
+
342
+ sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
343
+ sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
344
+ while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
345
+ sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
346
+
347
+ noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
348
+ return noisy_samples
@@ -117,6 +117,17 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
117
117
  by disable the corrector at the first few steps (e.g., disable_corrector=[0])
118
118
  solver_p (`SchedulerMixin`, default `None`):
119
119
  can be any other scheduler. If specified, the algorithm will become solver_p + UniC.
120
+ use_karras_sigmas (`bool`, *optional*, defaults to `False`):
121
+ This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
122
+ noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
123
+ of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
124
+ timestep_spacing (`str`, default `"linspace"`):
125
+ The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
126
+ Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
127
+ steps_offset (`int`, default `0`):
128
+ an offset added to the inference steps. You can use a combination of `offset=1` and
129
+ `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
130
+ stable diffusion.
120
131
  """
121
132
 
122
133
  _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -140,6 +151,9 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
140
151
  lower_order_final: bool = True,
141
152
  disable_corrector: List[int] = [],
142
153
  solver_p: SchedulerMixin = None,
154
+ use_karras_sigmas: Optional[bool] = False,
155
+ timestep_spacing: str = "linspace",
156
+ steps_offset: int = 0,
143
157
  ):
144
158
  if trained_betas is not None:
145
159
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -168,7 +182,7 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
168
182
 
169
183
  if solver_type not in ["bh1", "bh2"]:
170
184
  if solver_type in ["midpoint", "heun", "logrho"]:
171
- self.register_to_config(solver_type="bh1")
185
+ self.register_to_config(solver_type="bh2")
172
186
  else:
173
187
  raise NotImplementedError(f"{solver_type} does is not implemented for {self.__class__}")
174
188
 
@@ -194,12 +208,39 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
194
208
  device (`str` or `torch.device`, optional):
195
209
  the device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
196
210
  """
197
- timesteps = (
198
- np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
199
- .round()[::-1][:-1]
200
- .copy()
201
- .astype(np.int64)
202
- )
211
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
212
+ if self.config.timestep_spacing == "linspace":
213
+ timesteps = (
214
+ np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
215
+ .round()[::-1][:-1]
216
+ .copy()
217
+ .astype(np.int64)
218
+ )
219
+ elif self.config.timestep_spacing == "leading":
220
+ step_ratio = self.config.num_train_timesteps // (num_inference_steps + 1)
221
+ # creates integer timesteps by multiplying by ratio
222
+ # casting to int to avoid issues when num_inference_step is power of 3
223
+ timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64)
224
+ timesteps += self.config.steps_offset
225
+ elif self.config.timestep_spacing == "trailing":
226
+ step_ratio = self.config.num_train_timesteps / num_inference_steps
227
+ # creates integer timesteps by multiplying by ratio
228
+ # casting to int to avoid issues when num_inference_step is power of 3
229
+ timesteps = np.arange(self.config.num_train_timesteps, 0, -step_ratio).round().copy().astype(np.int64)
230
+ timesteps -= 1
231
+ else:
232
+ raise ValueError(
233
+ f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
234
+ )
235
+
236
+ sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
237
+ if self.config.use_karras_sigmas:
238
+ log_sigmas = np.log(sigmas)
239
+ sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
240
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
241
+ timesteps = np.flip(timesteps).copy().astype(np.int64)
242
+
243
+ self.sigmas = torch.from_numpy(sigmas)
203
244
 
204
245
  # when num_inference_steps == num_train_timesteps, we can end up with
205
246
  # duplicates in timesteps.
@@ -1,6 +1,6 @@
1
1
  import contextlib
2
2
  import copy
3
- from random import random
3
+ import random
4
4
  from typing import Any, Dict, Iterable, Optional, Union
5
5
 
6
6
  import numpy as np
@@ -58,6 +58,7 @@ from .import_utils import (
58
58
  is_flax_available,
59
59
  is_ftfy_available,
60
60
  is_inflect_available,
61
+ is_invisible_watermark_available,
61
62
  is_k_diffusion_available,
62
63
  is_k_diffusion_version,
63
64
  is_librosa_available,
@@ -103,7 +104,7 @@ if is_torch_available():
103
104
  )
104
105
  from .torch_utils import maybe_allow_in_graph
105
106
 
106
- from .testing_utils import export_to_video
107
+ from .testing_utils import export_to_gif, export_to_video
107
108
 
108
109
 
109
110
  logger = get_logger(__name__)
@@ -210,6 +210,21 @@ class AudioPipelineOutput(metaclass=DummyObject):
210
210
  requires_backends(cls, ["torch"])
211
211
 
212
212
 
213
+ class ConsistencyModelPipeline(metaclass=DummyObject):
214
+ _backends = ["torch"]
215
+
216
+ def __init__(self, *args, **kwargs):
217
+ requires_backends(self, ["torch"])
218
+
219
+ @classmethod
220
+ def from_config(cls, *args, **kwargs):
221
+ requires_backends(cls, ["torch"])
222
+
223
+ @classmethod
224
+ def from_pretrained(cls, *args, **kwargs):
225
+ requires_backends(cls, ["torch"])
226
+
227
+
213
228
  class DanceDiffusionPipeline(metaclass=DummyObject):
214
229
  _backends = ["torch"]
215
230
 
@@ -390,6 +405,21 @@ class ScoreSdeVePipeline(metaclass=DummyObject):
390
405
  requires_backends(cls, ["torch"])
391
406
 
392
407
 
408
+ class CMStochasticIterativeScheduler(metaclass=DummyObject):
409
+ _backends = ["torch"]
410
+
411
+ def __init__(self, *args, **kwargs):
412
+ requires_backends(self, ["torch"])
413
+
414
+ @classmethod
415
+ def from_config(cls, *args, **kwargs):
416
+ requires_backends(cls, ["torch"])
417
+
418
+ @classmethod
419
+ def from_pretrained(cls, *args, **kwargs):
420
+ requires_backends(cls, ["torch"])
421
+
422
+
393
423
  class DDIMInverseScheduler(metaclass=DummyObject):
394
424
  _backends = ["torch"]
395
425
 
@@ -405,6 +435,21 @@ class DDIMInverseScheduler(metaclass=DummyObject):
405
435
  requires_backends(cls, ["torch"])
406
436
 
407
437
 
438
+ class DDIMParallelScheduler(metaclass=DummyObject):
439
+ _backends = ["torch"]
440
+
441
+ def __init__(self, *args, **kwargs):
442
+ requires_backends(self, ["torch"])
443
+
444
+ @classmethod
445
+ def from_config(cls, *args, **kwargs):
446
+ requires_backends(cls, ["torch"])
447
+
448
+ @classmethod
449
+ def from_pretrained(cls, *args, **kwargs):
450
+ requires_backends(cls, ["torch"])
451
+
452
+
408
453
  class DDIMScheduler(metaclass=DummyObject):
409
454
  _backends = ["torch"]
410
455
 
@@ -420,6 +465,21 @@ class DDIMScheduler(metaclass=DummyObject):
420
465
  requires_backends(cls, ["torch"])
421
466
 
422
467
 
468
+ class DDPMParallelScheduler(metaclass=DummyObject):
469
+ _backends = ["torch"]
470
+
471
+ def __init__(self, *args, **kwargs):
472
+ requires_backends(self, ["torch"])
473
+
474
+ @classmethod
475
+ def from_config(cls, *args, **kwargs):
476
+ requires_backends(cls, ["torch"])
477
+
478
+ @classmethod
479
+ def from_pretrained(cls, *args, **kwargs):
480
+ requires_backends(cls, ["torch"])
481
+
482
+
423
483
  class DDPMScheduler(metaclass=DummyObject):
424
484
  _backends = ["torch"]
425
485
 
@@ -0,0 +1,32 @@
1
+ # This file is autogenerated by the command `make fix-copies`, do not edit.
2
+ from ..utils import DummyObject, requires_backends
3
+
4
+
5
+ class StableDiffusionXLImg2ImgPipeline(metaclass=DummyObject):
6
+ _backends = ["torch", "transformers", "invisible_watermark"]
7
+
8
+ def __init__(self, *args, **kwargs):
9
+ requires_backends(self, ["torch", "transformers", "invisible_watermark"])
10
+
11
+ @classmethod
12
+ def from_config(cls, *args, **kwargs):
13
+ requires_backends(cls, ["torch", "transformers", "invisible_watermark"])
14
+
15
+ @classmethod
16
+ def from_pretrained(cls, *args, **kwargs):
17
+ requires_backends(cls, ["torch", "transformers", "invisible_watermark"])
18
+
19
+
20
+ class StableDiffusionXLPipeline(metaclass=DummyObject):
21
+ _backends = ["torch", "transformers", "invisible_watermark"]
22
+
23
+ def __init__(self, *args, **kwargs):
24
+ requires_backends(self, ["torch", "transformers", "invisible_watermark"])
25
+
26
+ @classmethod
27
+ def from_config(cls, *args, **kwargs):
28
+ requires_backends(cls, ["torch", "transformers", "invisible_watermark"])
29
+
30
+ @classmethod
31
+ def from_pretrained(cls, *args, **kwargs):
32
+ requires_backends(cls, ["torch", "transformers", "invisible_watermark"])