diffusers 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (238) hide show
  1. diffusers/__init__.py +26 -2
  2. diffusers/commands/fp16_safetensors.py +10 -11
  3. diffusers/configuration_utils.py +13 -8
  4. diffusers/dependency_versions_check.py +0 -1
  5. diffusers/dependency_versions_table.py +5 -5
  6. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  7. diffusers/image_processor.py +463 -51
  8. diffusers/loaders/__init__.py +82 -0
  9. diffusers/loaders/ip_adapter.py +159 -0
  10. diffusers/loaders/lora.py +1553 -0
  11. diffusers/loaders/lora_conversion_utils.py +284 -0
  12. diffusers/loaders/single_file.py +637 -0
  13. diffusers/loaders/textual_inversion.py +455 -0
  14. diffusers/loaders/unet.py +828 -0
  15. diffusers/loaders/utils.py +59 -0
  16. diffusers/models/__init__.py +26 -9
  17. diffusers/models/activations.py +9 -6
  18. diffusers/models/attention.py +301 -29
  19. diffusers/models/attention_flax.py +9 -1
  20. diffusers/models/attention_processor.py +378 -6
  21. diffusers/models/autoencoders/__init__.py +5 -0
  22. diffusers/models/{autoencoder_asym_kl.py → autoencoders/autoencoder_asym_kl.py} +17 -12
  23. diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +47 -23
  24. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +402 -0
  25. diffusers/models/{autoencoder_tiny.py → autoencoders/autoencoder_tiny.py} +24 -28
  26. diffusers/models/{consistency_decoder_vae.py → autoencoders/consistency_decoder_vae.py} +51 -44
  27. diffusers/models/{vae.py → autoencoders/vae.py} +71 -17
  28. diffusers/models/controlnet.py +59 -39
  29. diffusers/models/controlnet_flax.py +19 -18
  30. diffusers/models/downsampling.py +338 -0
  31. diffusers/models/embeddings.py +112 -29
  32. diffusers/models/embeddings_flax.py +2 -0
  33. diffusers/models/lora.py +131 -1
  34. diffusers/models/modeling_flax_utils.py +14 -8
  35. diffusers/models/modeling_outputs.py +17 -0
  36. diffusers/models/modeling_utils.py +37 -29
  37. diffusers/models/normalization.py +110 -4
  38. diffusers/models/resnet.py +299 -652
  39. diffusers/models/transformer_2d.py +22 -5
  40. diffusers/models/transformer_temporal.py +183 -1
  41. diffusers/models/unet_2d_blocks_flax.py +5 -0
  42. diffusers/models/unet_2d_condition.py +46 -0
  43. diffusers/models/unet_2d_condition_flax.py +13 -13
  44. diffusers/models/unet_3d_blocks.py +957 -173
  45. diffusers/models/unet_3d_condition.py +16 -8
  46. diffusers/models/unet_kandinsky3.py +535 -0
  47. diffusers/models/unet_motion_model.py +48 -33
  48. diffusers/models/unet_spatio_temporal_condition.py +489 -0
  49. diffusers/models/upsampling.py +454 -0
  50. diffusers/models/uvit_2d.py +471 -0
  51. diffusers/models/vae_flax.py +7 -0
  52. diffusers/models/vq_model.py +12 -3
  53. diffusers/optimization.py +16 -9
  54. diffusers/pipelines/__init__.py +137 -76
  55. diffusers/pipelines/amused/__init__.py +62 -0
  56. diffusers/pipelines/amused/pipeline_amused.py +328 -0
  57. diffusers/pipelines/amused/pipeline_amused_img2img.py +347 -0
  58. diffusers/pipelines/amused/pipeline_amused_inpaint.py +378 -0
  59. diffusers/pipelines/animatediff/pipeline_animatediff.py +66 -8
  60. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -0
  61. diffusers/pipelines/auto_pipeline.py +23 -13
  62. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -0
  63. diffusers/pipelines/controlnet/pipeline_controlnet.py +238 -35
  64. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +148 -37
  65. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +155 -41
  66. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +123 -43
  67. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +216 -39
  68. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +106 -34
  69. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +1 -0
  70. diffusers/pipelines/ddim/pipeline_ddim.py +1 -0
  71. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -0
  72. diffusers/pipelines/deepfloyd_if/pipeline_if.py +13 -1
  73. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +13 -1
  74. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +13 -1
  75. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +13 -1
  76. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +13 -1
  77. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +13 -1
  78. diffusers/pipelines/deprecated/__init__.py +153 -0
  79. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/__init__.py +3 -3
  80. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion.py +177 -34
  81. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion_img2img.py +182 -37
  82. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_output.py +1 -1
  83. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/__init__.py +1 -1
  84. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/mel.py +2 -2
  85. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/pipeline_audio_diffusion.py +4 -4
  86. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/__init__.py +1 -1
  87. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/pipeline_latent_diffusion_uncond.py +4 -4
  88. diffusers/pipelines/{pndm → deprecated/pndm}/__init__.py +1 -1
  89. diffusers/pipelines/{pndm → deprecated/pndm}/pipeline_pndm.py +4 -4
  90. diffusers/pipelines/{repaint → deprecated/repaint}/__init__.py +1 -1
  91. diffusers/pipelines/{repaint → deprecated/repaint}/pipeline_repaint.py +5 -5
  92. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/__init__.py +1 -1
  93. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/pipeline_score_sde_ve.py +5 -4
  94. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/__init__.py +6 -6
  95. diffusers/pipelines/{spectrogram_diffusion/continous_encoder.py → deprecated/spectrogram_diffusion/continuous_encoder.py} +2 -2
  96. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/midi_utils.py +1 -1
  97. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/notes_encoder.py +2 -2
  98. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/pipeline_spectrogram_diffusion.py +8 -7
  99. diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py +55 -0
  100. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_cycle_diffusion.py +34 -13
  101. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_onnx_stable_diffusion_inpaint_legacy.py +7 -6
  102. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_inpaint_legacy.py +12 -11
  103. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_model_editing.py +17 -11
  104. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_paradigms.py +11 -10
  105. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_pix2pix_zero.py +14 -13
  106. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/__init__.py +1 -1
  107. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/pipeline_stochastic_karras_ve.py +4 -4
  108. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/__init__.py +3 -3
  109. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py +83 -51
  110. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py +4 -4
  111. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py +7 -6
  112. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py +7 -6
  113. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py +7 -6
  114. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py +3 -3
  115. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py +5 -5
  116. diffusers/pipelines/dit/pipeline_dit.py +1 -0
  117. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +1 -1
  118. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +3 -3
  119. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  120. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +1 -1
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +1 -1
  122. diffusers/pipelines/kandinsky3/__init__.py +49 -0
  123. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +98 -0
  124. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +589 -0
  125. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +654 -0
  126. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +111 -11
  127. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +102 -9
  128. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -0
  129. diffusers/pipelines/musicldm/pipeline_musicldm.py +1 -1
  130. diffusers/pipelines/onnx_utils.py +8 -5
  131. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +7 -2
  132. diffusers/pipelines/pipeline_flax_utils.py +11 -8
  133. diffusers/pipelines/pipeline_utils.py +63 -42
  134. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +247 -38
  135. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +3 -3
  136. diffusers/pipelines/stable_diffusion/__init__.py +37 -65
  137. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +75 -78
  138. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +2 -2
  139. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +2 -4
  140. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -0
  141. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +174 -11
  142. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +8 -3
  143. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +1 -0
  144. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +178 -11
  145. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +224 -13
  146. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +74 -20
  147. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -0
  148. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +7 -0
  149. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -0
  150. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -0
  151. diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py +48 -0
  152. diffusers/pipelines/{stable_diffusion → stable_diffusion_attend_and_excite}/pipeline_stable_diffusion_attend_and_excite.py +6 -2
  153. diffusers/pipelines/stable_diffusion_diffedit/__init__.py +48 -0
  154. diffusers/pipelines/{stable_diffusion → stable_diffusion_diffedit}/pipeline_stable_diffusion_diffedit.py +3 -3
  155. diffusers/pipelines/stable_diffusion_gligen/__init__.py +50 -0
  156. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen.py +3 -2
  157. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen_text_image.py +4 -3
  158. diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py +60 -0
  159. diffusers/pipelines/{stable_diffusion → stable_diffusion_k_diffusion}/pipeline_stable_diffusion_k_diffusion.py +7 -1
  160. diffusers/pipelines/stable_diffusion_ldm3d/__init__.py +48 -0
  161. diffusers/pipelines/{stable_diffusion → stable_diffusion_ldm3d}/pipeline_stable_diffusion_ldm3d.py +51 -7
  162. diffusers/pipelines/stable_diffusion_panorama/__init__.py +48 -0
  163. diffusers/pipelines/{stable_diffusion → stable_diffusion_panorama}/pipeline_stable_diffusion_panorama.py +57 -8
  164. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +58 -6
  165. diffusers/pipelines/stable_diffusion_sag/__init__.py +48 -0
  166. diffusers/pipelines/{stable_diffusion → stable_diffusion_sag}/pipeline_stable_diffusion_sag.py +68 -10
  167. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +194 -17
  168. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +205 -16
  169. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +206 -17
  170. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +23 -17
  171. diffusers/pipelines/stable_video_diffusion/__init__.py +58 -0
  172. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +652 -0
  173. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +108 -12
  174. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +115 -14
  175. diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -0
  176. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +6 -0
  177. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +23 -3
  178. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +334 -10
  179. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +1331 -0
  180. diffusers/pipelines/unclip/pipeline_unclip.py +2 -1
  181. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -0
  182. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  183. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +14 -4
  184. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +9 -5
  185. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +1 -1
  186. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +2 -2
  187. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -1
  188. diffusers/schedulers/__init__.py +4 -4
  189. diffusers/schedulers/deprecated/__init__.py +50 -0
  190. diffusers/schedulers/{scheduling_karras_ve.py → deprecated/scheduling_karras_ve.py} +4 -4
  191. diffusers/schedulers/{scheduling_sde_vp.py → deprecated/scheduling_sde_vp.py} +4 -6
  192. diffusers/schedulers/scheduling_amused.py +162 -0
  193. diffusers/schedulers/scheduling_consistency_models.py +2 -0
  194. diffusers/schedulers/scheduling_ddim.py +1 -3
  195. diffusers/schedulers/scheduling_ddim_inverse.py +2 -7
  196. diffusers/schedulers/scheduling_ddim_parallel.py +1 -3
  197. diffusers/schedulers/scheduling_ddpm.py +47 -3
  198. diffusers/schedulers/scheduling_ddpm_parallel.py +47 -3
  199. diffusers/schedulers/scheduling_deis_multistep.py +28 -6
  200. diffusers/schedulers/scheduling_dpmsolver_multistep.py +28 -6
  201. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +28 -6
  202. diffusers/schedulers/scheduling_dpmsolver_sde.py +3 -3
  203. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +28 -6
  204. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +59 -3
  205. diffusers/schedulers/scheduling_euler_discrete.py +102 -16
  206. diffusers/schedulers/scheduling_heun_discrete.py +17 -5
  207. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +17 -5
  208. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +17 -5
  209. diffusers/schedulers/scheduling_lcm.py +123 -29
  210. diffusers/schedulers/scheduling_lms_discrete.py +3 -3
  211. diffusers/schedulers/scheduling_pndm.py +1 -3
  212. diffusers/schedulers/scheduling_repaint.py +1 -3
  213. diffusers/schedulers/scheduling_unipc_multistep.py +28 -6
  214. diffusers/schedulers/scheduling_utils.py +3 -1
  215. diffusers/schedulers/scheduling_utils_flax.py +3 -1
  216. diffusers/training_utils.py +1 -1
  217. diffusers/utils/__init__.py +1 -2
  218. diffusers/utils/constants.py +10 -12
  219. diffusers/utils/dummy_pt_objects.py +75 -0
  220. diffusers/utils/dummy_torch_and_transformers_objects.py +105 -0
  221. diffusers/utils/dynamic_modules_utils.py +18 -22
  222. diffusers/utils/export_utils.py +8 -3
  223. diffusers/utils/hub_utils.py +24 -36
  224. diffusers/utils/logging.py +11 -11
  225. diffusers/utils/outputs.py +5 -5
  226. diffusers/utils/peft_utils.py +88 -44
  227. diffusers/utils/state_dict_utils.py +8 -0
  228. diffusers/utils/testing_utils.py +199 -1
  229. diffusers/utils/torch_utils.py +4 -4
  230. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/METADATA +86 -69
  231. diffusers-0.25.0.dist-info/RECORD +360 -0
  232. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/WHEEL +1 -1
  233. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/entry_points.txt +0 -1
  234. diffusers/loaders.py +0 -3336
  235. diffusers-0.23.1.dist-info/RECORD +0 -323
  236. /diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/modeling_roberta_series.py +0 -0
  237. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/LICENSE +0 -0
  238. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,471 @@
1
+ # coding=utf-8
2
+ # Copyright 2023 The HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from typing import Dict, Union
17
+
18
+ import torch
19
+ import torch.nn.functional as F
20
+ from torch import nn
21
+ from torch.utils.checkpoint import checkpoint
22
+
23
+ from ..configuration_utils import ConfigMixin, register_to_config
24
+ from .attention import BasicTransformerBlock, SkipFFTransformerBlock
25
+ from .attention_processor import (
26
+ ADDED_KV_ATTENTION_PROCESSORS,
27
+ CROSS_ATTENTION_PROCESSORS,
28
+ AttentionProcessor,
29
+ AttnAddedKVProcessor,
30
+ AttnProcessor,
31
+ )
32
+ from .embeddings import TimestepEmbedding, get_timestep_embedding
33
+ from .modeling_utils import ModelMixin
34
+ from .normalization import GlobalResponseNorm, RMSNorm
35
+ from .resnet import Downsample2D, Upsample2D
36
+
37
+
38
+ class UVit2DModel(ModelMixin, ConfigMixin):
39
+ _supports_gradient_checkpointing = True
40
+
41
+ @register_to_config
42
+ def __init__(
43
+ self,
44
+ # global config
45
+ hidden_size: int = 1024,
46
+ use_bias: bool = False,
47
+ hidden_dropout: float = 0.0,
48
+ # conditioning dimensions
49
+ cond_embed_dim: int = 768,
50
+ micro_cond_encode_dim: int = 256,
51
+ micro_cond_embed_dim: int = 1280,
52
+ encoder_hidden_size: int = 768,
53
+ # num tokens
54
+ vocab_size: int = 8256, # codebook_size + 1 (for the mask token) rounded
55
+ codebook_size: int = 8192,
56
+ # `UVit2DConvEmbed`
57
+ in_channels: int = 768,
58
+ block_out_channels: int = 768,
59
+ num_res_blocks: int = 3,
60
+ downsample: bool = False,
61
+ upsample: bool = False,
62
+ block_num_heads: int = 12,
63
+ # `TransformerLayer`
64
+ num_hidden_layers: int = 22,
65
+ num_attention_heads: int = 16,
66
+ # `Attention`
67
+ attention_dropout: float = 0.0,
68
+ # `FeedForward`
69
+ intermediate_size: int = 2816,
70
+ # `Norm`
71
+ layer_norm_eps: float = 1e-6,
72
+ ln_elementwise_affine: bool = True,
73
+ sample_size: int = 64,
74
+ ):
75
+ super().__init__()
76
+
77
+ self.encoder_proj = nn.Linear(encoder_hidden_size, hidden_size, bias=use_bias)
78
+ self.encoder_proj_layer_norm = RMSNorm(hidden_size, layer_norm_eps, ln_elementwise_affine)
79
+
80
+ self.embed = UVit2DConvEmbed(
81
+ in_channels, block_out_channels, vocab_size, ln_elementwise_affine, layer_norm_eps, use_bias
82
+ )
83
+
84
+ self.cond_embed = TimestepEmbedding(
85
+ micro_cond_embed_dim + cond_embed_dim, hidden_size, sample_proj_bias=use_bias
86
+ )
87
+
88
+ self.down_block = UVitBlock(
89
+ block_out_channels,
90
+ num_res_blocks,
91
+ hidden_size,
92
+ hidden_dropout,
93
+ ln_elementwise_affine,
94
+ layer_norm_eps,
95
+ use_bias,
96
+ block_num_heads,
97
+ attention_dropout,
98
+ downsample,
99
+ False,
100
+ )
101
+
102
+ self.project_to_hidden_norm = RMSNorm(block_out_channels, layer_norm_eps, ln_elementwise_affine)
103
+ self.project_to_hidden = nn.Linear(block_out_channels, hidden_size, bias=use_bias)
104
+
105
+ self.transformer_layers = nn.ModuleList(
106
+ [
107
+ BasicTransformerBlock(
108
+ dim=hidden_size,
109
+ num_attention_heads=num_attention_heads,
110
+ attention_head_dim=hidden_size // num_attention_heads,
111
+ dropout=hidden_dropout,
112
+ cross_attention_dim=hidden_size,
113
+ attention_bias=use_bias,
114
+ norm_type="ada_norm_continuous",
115
+ ada_norm_continous_conditioning_embedding_dim=hidden_size,
116
+ norm_elementwise_affine=ln_elementwise_affine,
117
+ norm_eps=layer_norm_eps,
118
+ ada_norm_bias=use_bias,
119
+ ff_inner_dim=intermediate_size,
120
+ ff_bias=use_bias,
121
+ attention_out_bias=use_bias,
122
+ )
123
+ for _ in range(num_hidden_layers)
124
+ ]
125
+ )
126
+
127
+ self.project_from_hidden_norm = RMSNorm(hidden_size, layer_norm_eps, ln_elementwise_affine)
128
+ self.project_from_hidden = nn.Linear(hidden_size, block_out_channels, bias=use_bias)
129
+
130
+ self.up_block = UVitBlock(
131
+ block_out_channels,
132
+ num_res_blocks,
133
+ hidden_size,
134
+ hidden_dropout,
135
+ ln_elementwise_affine,
136
+ layer_norm_eps,
137
+ use_bias,
138
+ block_num_heads,
139
+ attention_dropout,
140
+ downsample=False,
141
+ upsample=upsample,
142
+ )
143
+
144
+ self.mlm_layer = ConvMlmLayer(
145
+ block_out_channels, in_channels, use_bias, ln_elementwise_affine, layer_norm_eps, codebook_size
146
+ )
147
+
148
+ self.gradient_checkpointing = False
149
+
150
+ def _set_gradient_checkpointing(self, module, value: bool = False) -> None:
151
+ pass
152
+
153
+ def forward(self, input_ids, encoder_hidden_states, pooled_text_emb, micro_conds, cross_attention_kwargs=None):
154
+ encoder_hidden_states = self.encoder_proj(encoder_hidden_states)
155
+ encoder_hidden_states = self.encoder_proj_layer_norm(encoder_hidden_states)
156
+
157
+ micro_cond_embeds = get_timestep_embedding(
158
+ micro_conds.flatten(), self.config.micro_cond_encode_dim, flip_sin_to_cos=True, downscale_freq_shift=0
159
+ )
160
+
161
+ micro_cond_embeds = micro_cond_embeds.reshape((input_ids.shape[0], -1))
162
+
163
+ pooled_text_emb = torch.cat([pooled_text_emb, micro_cond_embeds], dim=1)
164
+ pooled_text_emb = pooled_text_emb.to(dtype=self.dtype)
165
+ pooled_text_emb = self.cond_embed(pooled_text_emb).to(encoder_hidden_states.dtype)
166
+
167
+ hidden_states = self.embed(input_ids)
168
+
169
+ hidden_states = self.down_block(
170
+ hidden_states,
171
+ pooled_text_emb=pooled_text_emb,
172
+ encoder_hidden_states=encoder_hidden_states,
173
+ cross_attention_kwargs=cross_attention_kwargs,
174
+ )
175
+
176
+ batch_size, channels, height, width = hidden_states.shape
177
+ hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch_size, height * width, channels)
178
+
179
+ hidden_states = self.project_to_hidden_norm(hidden_states)
180
+ hidden_states = self.project_to_hidden(hidden_states)
181
+
182
+ for layer in self.transformer_layers:
183
+ if self.training and self.gradient_checkpointing:
184
+
185
+ def layer_(*args):
186
+ return checkpoint(layer, *args)
187
+
188
+ else:
189
+ layer_ = layer
190
+
191
+ hidden_states = layer_(
192
+ hidden_states,
193
+ encoder_hidden_states=encoder_hidden_states,
194
+ cross_attention_kwargs=cross_attention_kwargs,
195
+ added_cond_kwargs={"pooled_text_emb": pooled_text_emb},
196
+ )
197
+
198
+ hidden_states = self.project_from_hidden_norm(hidden_states)
199
+ hidden_states = self.project_from_hidden(hidden_states)
200
+
201
+ hidden_states = hidden_states.reshape(batch_size, height, width, channels).permute(0, 3, 1, 2)
202
+
203
+ hidden_states = self.up_block(
204
+ hidden_states,
205
+ pooled_text_emb=pooled_text_emb,
206
+ encoder_hidden_states=encoder_hidden_states,
207
+ cross_attention_kwargs=cross_attention_kwargs,
208
+ )
209
+
210
+ logits = self.mlm_layer(hidden_states)
211
+
212
+ return logits
213
+
214
+ @property
215
+ # Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.attn_processors
216
+ def attn_processors(self) -> Dict[str, AttentionProcessor]:
217
+ r"""
218
+ Returns:
219
+ `dict` of attention processors: A dictionary containing all attention processors used in the model with
220
+ indexed by its weight name.
221
+ """
222
+ # set recursively
223
+ processors = {}
224
+
225
+ def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
226
+ if hasattr(module, "get_processor"):
227
+ processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
228
+
229
+ for sub_name, child in module.named_children():
230
+ fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
231
+
232
+ return processors
233
+
234
+ for name, module in self.named_children():
235
+ fn_recursive_add_processors(name, module, processors)
236
+
237
+ return processors
238
+
239
+ # Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attn_processor
240
+ def set_attn_processor(
241
+ self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]], _remove_lora=False
242
+ ):
243
+ r"""
244
+ Sets the attention processor to use to compute attention.
245
+
246
+ Parameters:
247
+ processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
248
+ The instantiated processor class or a dictionary of processor classes that will be set as the processor
249
+ for **all** `Attention` layers.
250
+
251
+ If `processor` is a dict, the key needs to define the path to the corresponding cross attention
252
+ processor. This is strongly recommended when setting trainable attention processors.
253
+
254
+ """
255
+ count = len(self.attn_processors.keys())
256
+
257
+ if isinstance(processor, dict) and len(processor) != count:
258
+ raise ValueError(
259
+ f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
260
+ f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
261
+ )
262
+
263
+ def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
264
+ if hasattr(module, "set_processor"):
265
+ if not isinstance(processor, dict):
266
+ module.set_processor(processor, _remove_lora=_remove_lora)
267
+ else:
268
+ module.set_processor(processor.pop(f"{name}.processor"), _remove_lora=_remove_lora)
269
+
270
+ for sub_name, child in module.named_children():
271
+ fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
272
+
273
+ for name, module in self.named_children():
274
+ fn_recursive_attn_processor(name, module, processor)
275
+
276
+ # Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_default_attn_processor
277
+ def set_default_attn_processor(self):
278
+ """
279
+ Disables custom attention processors and sets the default attention implementation.
280
+ """
281
+ if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
282
+ processor = AttnAddedKVProcessor()
283
+ elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
284
+ processor = AttnProcessor()
285
+ else:
286
+ raise ValueError(
287
+ f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
288
+ )
289
+
290
+ self.set_attn_processor(processor, _remove_lora=True)
291
+
292
+
293
+ class UVit2DConvEmbed(nn.Module):
294
+ def __init__(self, in_channels, block_out_channels, vocab_size, elementwise_affine, eps, bias):
295
+ super().__init__()
296
+ self.embeddings = nn.Embedding(vocab_size, in_channels)
297
+ self.layer_norm = RMSNorm(in_channels, eps, elementwise_affine)
298
+ self.conv = nn.Conv2d(in_channels, block_out_channels, kernel_size=1, bias=bias)
299
+
300
+ def forward(self, input_ids):
301
+ embeddings = self.embeddings(input_ids)
302
+ embeddings = self.layer_norm(embeddings)
303
+ embeddings = embeddings.permute(0, 3, 1, 2)
304
+ embeddings = self.conv(embeddings)
305
+ return embeddings
306
+
307
+
308
+ class UVitBlock(nn.Module):
309
+ def __init__(
310
+ self,
311
+ channels,
312
+ num_res_blocks: int,
313
+ hidden_size,
314
+ hidden_dropout,
315
+ ln_elementwise_affine,
316
+ layer_norm_eps,
317
+ use_bias,
318
+ block_num_heads,
319
+ attention_dropout,
320
+ downsample: bool,
321
+ upsample: bool,
322
+ ):
323
+ super().__init__()
324
+
325
+ if downsample:
326
+ self.downsample = Downsample2D(
327
+ channels,
328
+ use_conv=True,
329
+ padding=0,
330
+ name="Conv2d_0",
331
+ kernel_size=2,
332
+ norm_type="rms_norm",
333
+ eps=layer_norm_eps,
334
+ elementwise_affine=ln_elementwise_affine,
335
+ bias=use_bias,
336
+ )
337
+ else:
338
+ self.downsample = None
339
+
340
+ self.res_blocks = nn.ModuleList(
341
+ [
342
+ ConvNextBlock(
343
+ channels,
344
+ layer_norm_eps,
345
+ ln_elementwise_affine,
346
+ use_bias,
347
+ hidden_dropout,
348
+ hidden_size,
349
+ )
350
+ for i in range(num_res_blocks)
351
+ ]
352
+ )
353
+
354
+ self.attention_blocks = nn.ModuleList(
355
+ [
356
+ SkipFFTransformerBlock(
357
+ channels,
358
+ block_num_heads,
359
+ channels // block_num_heads,
360
+ hidden_size,
361
+ use_bias,
362
+ attention_dropout,
363
+ channels,
364
+ attention_bias=use_bias,
365
+ attention_out_bias=use_bias,
366
+ )
367
+ for _ in range(num_res_blocks)
368
+ ]
369
+ )
370
+
371
+ if upsample:
372
+ self.upsample = Upsample2D(
373
+ channels,
374
+ use_conv_transpose=True,
375
+ kernel_size=2,
376
+ padding=0,
377
+ name="conv",
378
+ norm_type="rms_norm",
379
+ eps=layer_norm_eps,
380
+ elementwise_affine=ln_elementwise_affine,
381
+ bias=use_bias,
382
+ interpolate=False,
383
+ )
384
+ else:
385
+ self.upsample = None
386
+
387
+ def forward(self, x, pooled_text_emb, encoder_hidden_states, cross_attention_kwargs):
388
+ if self.downsample is not None:
389
+ x = self.downsample(x)
390
+
391
+ for res_block, attention_block in zip(self.res_blocks, self.attention_blocks):
392
+ x = res_block(x, pooled_text_emb)
393
+
394
+ batch_size, channels, height, width = x.shape
395
+ x = x.view(batch_size, channels, height * width).permute(0, 2, 1)
396
+ x = attention_block(
397
+ x, encoder_hidden_states=encoder_hidden_states, cross_attention_kwargs=cross_attention_kwargs
398
+ )
399
+ x = x.permute(0, 2, 1).view(batch_size, channels, height, width)
400
+
401
+ if self.upsample is not None:
402
+ x = self.upsample(x)
403
+
404
+ return x
405
+
406
+
407
+ class ConvNextBlock(nn.Module):
408
+ def __init__(
409
+ self, channels, layer_norm_eps, ln_elementwise_affine, use_bias, hidden_dropout, hidden_size, res_ffn_factor=4
410
+ ):
411
+ super().__init__()
412
+ self.depthwise = nn.Conv2d(
413
+ channels,
414
+ channels,
415
+ kernel_size=3,
416
+ padding=1,
417
+ groups=channels,
418
+ bias=use_bias,
419
+ )
420
+ self.norm = RMSNorm(channels, layer_norm_eps, ln_elementwise_affine)
421
+ self.channelwise_linear_1 = nn.Linear(channels, int(channels * res_ffn_factor), bias=use_bias)
422
+ self.channelwise_act = nn.GELU()
423
+ self.channelwise_norm = GlobalResponseNorm(int(channels * res_ffn_factor))
424
+ self.channelwise_linear_2 = nn.Linear(int(channels * res_ffn_factor), channels, bias=use_bias)
425
+ self.channelwise_dropout = nn.Dropout(hidden_dropout)
426
+ self.cond_embeds_mapper = nn.Linear(hidden_size, channels * 2, use_bias)
427
+
428
+ def forward(self, x, cond_embeds):
429
+ x_res = x
430
+
431
+ x = self.depthwise(x)
432
+
433
+ x = x.permute(0, 2, 3, 1)
434
+ x = self.norm(x)
435
+
436
+ x = self.channelwise_linear_1(x)
437
+ x = self.channelwise_act(x)
438
+ x = self.channelwise_norm(x)
439
+ x = self.channelwise_linear_2(x)
440
+ x = self.channelwise_dropout(x)
441
+
442
+ x = x.permute(0, 3, 1, 2)
443
+
444
+ x = x + x_res
445
+
446
+ scale, shift = self.cond_embeds_mapper(F.silu(cond_embeds)).chunk(2, dim=1)
447
+ x = x * (1 + scale[:, :, None, None]) + shift[:, :, None, None]
448
+
449
+ return x
450
+
451
+
452
+ class ConvMlmLayer(nn.Module):
453
+ def __init__(
454
+ self,
455
+ block_out_channels: int,
456
+ in_channels: int,
457
+ use_bias: bool,
458
+ ln_elementwise_affine: bool,
459
+ layer_norm_eps: float,
460
+ codebook_size: int,
461
+ ):
462
+ super().__init__()
463
+ self.conv1 = nn.Conv2d(block_out_channels, in_channels, kernel_size=1, bias=use_bias)
464
+ self.layer_norm = RMSNorm(in_channels, layer_norm_eps, ln_elementwise_affine)
465
+ self.conv2 = nn.Conv2d(in_channels, codebook_size, kernel_size=1, bias=use_bias)
466
+
467
+ def forward(self, hidden_states):
468
+ hidden_states = self.conv1(hidden_states)
469
+ hidden_states = self.layer_norm(hidden_states.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
470
+ logits = self.conv2(hidden_states)
471
+ return logits
@@ -214,6 +214,7 @@ class FlaxAttentionBlock(nn.Module):
214
214
  Parameters `dtype`
215
215
 
216
216
  """
217
+
217
218
  channels: int
218
219
  num_head_channels: int = None
219
220
  num_groups: int = 32
@@ -291,6 +292,7 @@ class FlaxDownEncoderBlock2D(nn.Module):
291
292
  dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
292
293
  Parameters `dtype`
293
294
  """
295
+
294
296
  in_channels: int
295
297
  out_channels: int
296
298
  dropout: float = 0.0
@@ -347,6 +349,7 @@ class FlaxUpDecoderBlock2D(nn.Module):
347
349
  dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
348
350
  Parameters `dtype`
349
351
  """
352
+
350
353
  in_channels: int
351
354
  out_channels: int
352
355
  dropout: float = 0.0
@@ -401,6 +404,7 @@ class FlaxUNetMidBlock2D(nn.Module):
401
404
  dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
402
405
  Parameters `dtype`
403
406
  """
407
+
404
408
  in_channels: int
405
409
  dropout: float = 0.0
406
410
  num_layers: int = 1
@@ -488,6 +492,7 @@ class FlaxEncoder(nn.Module):
488
492
  dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
489
493
  Parameters `dtype`
490
494
  """
495
+
491
496
  in_channels: int = 3
492
497
  out_channels: int = 3
493
498
  down_block_types: Tuple[str] = ("DownEncoderBlock2D",)
@@ -600,6 +605,7 @@ class FlaxDecoder(nn.Module):
600
605
  dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
601
606
  parameters `dtype`
602
607
  """
608
+
603
609
  in_channels: int = 3
604
610
  out_channels: int = 3
605
611
  up_block_types: Tuple[str] = ("UpDecoderBlock2D",)
@@ -767,6 +773,7 @@ class FlaxAutoencoderKL(nn.Module, FlaxModelMixin, ConfigMixin):
767
773
  dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`):
768
774
  The `dtype` of the parameters.
769
775
  """
776
+
770
777
  in_channels: int = 3
771
778
  out_channels: int = 3
772
779
  down_block_types: Tuple[str] = ("DownEncoderBlock2D",)
@@ -20,8 +20,8 @@ import torch.nn as nn
20
20
  from ..configuration_utils import ConfigMixin, register_to_config
21
21
  from ..utils import BaseOutput
22
22
  from ..utils.accelerate_utils import apply_forward_hook
23
+ from .autoencoders.vae import Decoder, DecoderOutput, Encoder, VectorQuantizer
23
24
  from .modeling_utils import ModelMixin
24
- from .vae import Decoder, DecoderOutput, Encoder, VectorQuantizer
25
25
 
26
26
 
27
27
  @dataclass
@@ -88,6 +88,9 @@ class VQModel(ModelMixin, ConfigMixin):
88
88
  vq_embed_dim: Optional[int] = None,
89
89
  scaling_factor: float = 0.18215,
90
90
  norm_type: str = "group", # group, spatial
91
+ mid_block_add_attention=True,
92
+ lookup_from_codebook=False,
93
+ force_upcast=False,
91
94
  ):
92
95
  super().__init__()
93
96
 
@@ -101,6 +104,7 @@ class VQModel(ModelMixin, ConfigMixin):
101
104
  act_fn=act_fn,
102
105
  norm_num_groups=norm_num_groups,
103
106
  double_z=False,
107
+ mid_block_add_attention=mid_block_add_attention,
104
108
  )
105
109
 
106
110
  vq_embed_dim = vq_embed_dim if vq_embed_dim is not None else latent_channels
@@ -119,6 +123,7 @@ class VQModel(ModelMixin, ConfigMixin):
119
123
  act_fn=act_fn,
120
124
  norm_num_groups=norm_num_groups,
121
125
  norm_type=norm_type,
126
+ mid_block_add_attention=mid_block_add_attention,
122
127
  )
123
128
 
124
129
  @apply_forward_hook
@@ -133,11 +138,13 @@ class VQModel(ModelMixin, ConfigMixin):
133
138
 
134
139
  @apply_forward_hook
135
140
  def decode(
136
- self, h: torch.FloatTensor, force_not_quantize: bool = False, return_dict: bool = True
141
+ self, h: torch.FloatTensor, force_not_quantize: bool = False, return_dict: bool = True, shape=None
137
142
  ) -> Union[DecoderOutput, torch.FloatTensor]:
138
143
  # also go through quantization layer
139
144
  if not force_not_quantize:
140
145
  quant, _, _ = self.quantize(h)
146
+ elif self.config.lookup_from_codebook:
147
+ quant = self.quantize.get_codebook_entry(h, shape)
141
148
  else:
142
149
  quant = h
143
150
  quant2 = self.post_quant_conv(quant)
@@ -148,7 +155,9 @@ class VQModel(ModelMixin, ConfigMixin):
148
155
 
149
156
  return DecoderOutput(sample=dec)
150
157
 
151
- def forward(self, sample: torch.FloatTensor, return_dict: bool = True) -> Union[DecoderOutput, torch.FloatTensor]:
158
+ def forward(
159
+ self, sample: torch.FloatTensor, return_dict: bool = True
160
+ ) -> Union[DecoderOutput, Tuple[torch.FloatTensor, ...]]:
152
161
  r"""
153
162
  The [`VQModel`] forward method.
154
163
 
diffusers/optimization.py CHANGED
@@ -37,7 +37,7 @@ class SchedulerType(Enum):
37
37
  PIECEWISE_CONSTANT = "piecewise_constant"
38
38
 
39
39
 
40
- def get_constant_schedule(optimizer: Optimizer, last_epoch: int = -1):
40
+ def get_constant_schedule(optimizer: Optimizer, last_epoch: int = -1) -> LambdaLR:
41
41
  """
42
42
  Create a schedule with a constant learning rate, using the learning rate set in optimizer.
43
43
 
@@ -53,7 +53,7 @@ def get_constant_schedule(optimizer: Optimizer, last_epoch: int = -1):
53
53
  return LambdaLR(optimizer, lambda _: 1, last_epoch=last_epoch)
54
54
 
55
55
 
56
- def get_constant_schedule_with_warmup(optimizer: Optimizer, num_warmup_steps: int, last_epoch: int = -1):
56
+ def get_constant_schedule_with_warmup(optimizer: Optimizer, num_warmup_steps: int, last_epoch: int = -1) -> LambdaLR:
57
57
  """
58
58
  Create a schedule with a constant learning rate preceded by a warmup period during which the learning rate
59
59
  increases linearly between 0 and the initial lr set in the optimizer.
@@ -78,7 +78,7 @@ def get_constant_schedule_with_warmup(optimizer: Optimizer, num_warmup_steps: in
78
78
  return LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)
79
79
 
80
80
 
81
- def get_piecewise_constant_schedule(optimizer: Optimizer, step_rules: str, last_epoch: int = -1):
81
+ def get_piecewise_constant_schedule(optimizer: Optimizer, step_rules: str, last_epoch: int = -1) -> LambdaLR:
82
82
  """
83
83
  Create a schedule with a constant learning rate, using the learning rate set in optimizer.
84
84
 
@@ -120,7 +120,9 @@ def get_piecewise_constant_schedule(optimizer: Optimizer, step_rules: str, last_
120
120
  return LambdaLR(optimizer, rules_func, last_epoch=last_epoch)
121
121
 
122
122
 
123
- def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, last_epoch=-1):
123
+ def get_linear_schedule_with_warmup(
124
+ optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int, last_epoch: int = -1
125
+ ) -> LambdaLR:
124
126
  """
125
127
  Create a schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after
126
128
  a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer.
@@ -151,7 +153,7 @@ def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_st
151
153
 
152
154
  def get_cosine_schedule_with_warmup(
153
155
  optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: float = 0.5, last_epoch: int = -1
154
- ):
156
+ ) -> LambdaLR:
155
157
  """
156
158
  Create a schedule with a learning rate that decreases following the values of the cosine function between the
157
159
  initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
@@ -185,7 +187,7 @@ def get_cosine_schedule_with_warmup(
185
187
 
186
188
  def get_cosine_with_hard_restarts_schedule_with_warmup(
187
189
  optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: int = 1, last_epoch: int = -1
188
- ):
190
+ ) -> LambdaLR:
189
191
  """
190
192
  Create a schedule with a learning rate that decreases following the values of the cosine function between the
191
193
  initial lr set in the optimizer to 0, with several hard restarts, after a warmup period during which it increases
@@ -219,8 +221,13 @@ def get_cosine_with_hard_restarts_schedule_with_warmup(
219
221
 
220
222
 
221
223
  def get_polynomial_decay_schedule_with_warmup(
222
- optimizer, num_warmup_steps, num_training_steps, lr_end=1e-7, power=1.0, last_epoch=-1
223
- ):
224
+ optimizer: Optimizer,
225
+ num_warmup_steps: int,
226
+ num_training_steps: int,
227
+ lr_end: float = 1e-7,
228
+ power: float = 1.0,
229
+ last_epoch: int = -1,
230
+ ) -> LambdaLR:
224
231
  """
225
232
  Create a schedule with a learning rate that decreases as a polynomial decay from the initial lr set in the
226
233
  optimizer to end lr defined by *lr_end*, after a warmup period during which it increases linearly from 0 to the
@@ -288,7 +295,7 @@ def get_scheduler(
288
295
  num_cycles: int = 1,
289
296
  power: float = 1.0,
290
297
  last_epoch: int = -1,
291
- ):
298
+ ) -> LambdaLR:
292
299
  """
293
300
  Unified API to get any scheduler from its name.
294
301