diffusers 0.30.3__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. diffusers/__init__.py +97 -4
  2. diffusers/callbacks.py +56 -3
  3. diffusers/configuration_utils.py +13 -1
  4. diffusers/image_processor.py +282 -71
  5. diffusers/loaders/__init__.py +24 -3
  6. diffusers/loaders/ip_adapter.py +543 -16
  7. diffusers/loaders/lora_base.py +138 -125
  8. diffusers/loaders/lora_conversion_utils.py +647 -0
  9. diffusers/loaders/lora_pipeline.py +2216 -230
  10. diffusers/loaders/peft.py +380 -0
  11. diffusers/loaders/single_file_model.py +71 -4
  12. diffusers/loaders/single_file_utils.py +597 -10
  13. diffusers/loaders/textual_inversion.py +5 -3
  14. diffusers/loaders/transformer_flux.py +181 -0
  15. diffusers/loaders/transformer_sd3.py +89 -0
  16. diffusers/loaders/unet.py +56 -12
  17. diffusers/models/__init__.py +49 -12
  18. diffusers/models/activations.py +22 -9
  19. diffusers/models/adapter.py +53 -53
  20. diffusers/models/attention.py +98 -13
  21. diffusers/models/attention_flax.py +1 -1
  22. diffusers/models/attention_processor.py +2160 -346
  23. diffusers/models/autoencoders/__init__.py +5 -0
  24. diffusers/models/autoencoders/autoencoder_dc.py +620 -0
  25. diffusers/models/autoencoders/autoencoder_kl.py +73 -12
  26. diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
  27. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +213 -105
  28. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
  29. diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
  30. diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
  31. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
  32. diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
  33. diffusers/models/autoencoders/vae.py +18 -5
  34. diffusers/models/controlnet.py +47 -802
  35. diffusers/models/controlnet_flux.py +70 -0
  36. diffusers/models/controlnet_sd3.py +26 -376
  37. diffusers/models/controlnet_sparsectrl.py +46 -719
  38. diffusers/models/controlnets/__init__.py +23 -0
  39. diffusers/models/controlnets/controlnet.py +872 -0
  40. diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
  41. diffusers/models/controlnets/controlnet_flux.py +536 -0
  42. diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
  43. diffusers/models/controlnets/controlnet_sd3.py +489 -0
  44. diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
  45. diffusers/models/controlnets/controlnet_union.py +832 -0
  46. diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
  47. diffusers/models/controlnets/multicontrolnet.py +183 -0
  48. diffusers/models/embeddings.py +996 -92
  49. diffusers/models/embeddings_flax.py +23 -9
  50. diffusers/models/model_loading_utils.py +264 -14
  51. diffusers/models/modeling_flax_utils.py +1 -1
  52. diffusers/models/modeling_utils.py +334 -51
  53. diffusers/models/normalization.py +157 -13
  54. diffusers/models/transformers/__init__.py +6 -0
  55. diffusers/models/transformers/auraflow_transformer_2d.py +3 -2
  56. diffusers/models/transformers/cogvideox_transformer_3d.py +69 -13
  57. diffusers/models/transformers/dit_transformer_2d.py +1 -1
  58. diffusers/models/transformers/latte_transformer_3d.py +4 -4
  59. diffusers/models/transformers/pixart_transformer_2d.py +10 -2
  60. diffusers/models/transformers/sana_transformer.py +488 -0
  61. diffusers/models/transformers/stable_audio_transformer.py +1 -1
  62. diffusers/models/transformers/transformer_2d.py +1 -1
  63. diffusers/models/transformers/transformer_allegro.py +422 -0
  64. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  65. diffusers/models/transformers/transformer_flux.py +189 -51
  66. diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
  67. diffusers/models/transformers/transformer_ltx.py +469 -0
  68. diffusers/models/transformers/transformer_mochi.py +499 -0
  69. diffusers/models/transformers/transformer_sd3.py +112 -18
  70. diffusers/models/transformers/transformer_temporal.py +1 -1
  71. diffusers/models/unets/unet_1d_blocks.py +1 -1
  72. diffusers/models/unets/unet_2d.py +8 -1
  73. diffusers/models/unets/unet_2d_blocks.py +88 -21
  74. diffusers/models/unets/unet_2d_condition.py +9 -9
  75. diffusers/models/unets/unet_3d_blocks.py +9 -7
  76. diffusers/models/unets/unet_motion_model.py +46 -68
  77. diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
  78. diffusers/models/unets/unet_stable_cascade.py +2 -2
  79. diffusers/models/unets/uvit_2d.py +1 -1
  80. diffusers/models/upsampling.py +14 -6
  81. diffusers/pipelines/__init__.py +69 -6
  82. diffusers/pipelines/allegro/__init__.py +48 -0
  83. diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
  84. diffusers/pipelines/allegro/pipeline_output.py +23 -0
  85. diffusers/pipelines/animatediff/__init__.py +2 -0
  86. diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
  87. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +52 -22
  88. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
  89. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +3 -1
  90. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -72
  91. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  92. diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
  93. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +2 -9
  94. diffusers/pipelines/auto_pipeline.py +88 -10
  95. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  96. diffusers/pipelines/cogvideo/__init__.py +2 -0
  97. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +80 -39
  98. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
  99. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +108 -50
  100. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +89 -50
  101. diffusers/pipelines/cogview3/__init__.py +47 -0
  102. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  103. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  104. diffusers/pipelines/controlnet/__init__.py +86 -80
  105. diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
  106. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -3
  107. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +9 -2
  108. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +9 -2
  109. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +37 -15
  110. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +12 -4
  111. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +9 -4
  112. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
  113. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
  114. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
  115. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +22 -4
  116. diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
  117. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +56 -20
  118. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  119. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  120. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  121. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
  122. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
  123. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +32 -9
  124. diffusers/pipelines/flux/__init__.py +23 -1
  125. diffusers/pipelines/flux/modeling_flux.py +47 -0
  126. diffusers/pipelines/flux/pipeline_flux.py +256 -48
  127. diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
  128. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
  129. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
  130. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
  131. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
  132. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
  133. diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
  134. diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
  135. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
  136. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
  137. diffusers/pipelines/flux/pipeline_output.py +16 -0
  138. diffusers/pipelines/free_noise_utils.py +365 -5
  139. diffusers/pipelines/hunyuan_video/__init__.py +48 -0
  140. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
  141. diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
  142. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +20 -4
  143. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
  144. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
  145. diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
  146. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
  147. diffusers/pipelines/kolors/text_encoder.py +2 -2
  148. diffusers/pipelines/kolors/tokenizer.py +4 -0
  149. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
  150. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
  151. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  152. diffusers/pipelines/latte/pipeline_latte.py +2 -2
  153. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
  154. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
  155. diffusers/pipelines/ltx/__init__.py +50 -0
  156. diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
  157. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
  158. diffusers/pipelines/ltx/pipeline_output.py +20 -0
  159. diffusers/pipelines/lumina/pipeline_lumina.py +3 -10
  160. diffusers/pipelines/mochi/__init__.py +48 -0
  161. diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
  162. diffusers/pipelines/mochi/pipeline_output.py +20 -0
  163. diffusers/pipelines/pag/__init__.py +13 -0
  164. diffusers/pipelines/pag/pag_utils.py +8 -2
  165. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +2 -3
  166. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
  167. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +3 -5
  168. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
  169. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +22 -6
  170. diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
  171. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +7 -14
  172. diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
  173. diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
  174. diffusers/pipelines/pag/pipeline_pag_sd_3.py +18 -9
  175. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
  176. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
  177. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
  178. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
  179. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
  180. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
  181. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
  182. diffusers/pipelines/pia/pipeline_pia.py +2 -0
  183. diffusers/pipelines/pipeline_flax_utils.py +1 -1
  184. diffusers/pipelines/pipeline_loading_utils.py +250 -31
  185. diffusers/pipelines/pipeline_utils.py +158 -186
  186. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +7 -14
  187. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +7 -14
  188. diffusers/pipelines/sana/__init__.py +47 -0
  189. diffusers/pipelines/sana/pipeline_output.py +21 -0
  190. diffusers/pipelines/sana/pipeline_sana.py +884 -0
  191. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
  192. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
  193. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
  194. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +46 -9
  195. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
  196. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
  197. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
  198. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +228 -23
  199. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +82 -13
  200. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +60 -11
  201. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
  202. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  203. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
  204. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
  205. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -12
  206. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -22
  207. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -22
  208. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
  209. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
  210. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
  211. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
  212. diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
  213. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  214. diffusers/quantizers/__init__.py +16 -0
  215. diffusers/quantizers/auto.py +139 -0
  216. diffusers/quantizers/base.py +233 -0
  217. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  218. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
  219. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  220. diffusers/quantizers/gguf/__init__.py +1 -0
  221. diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
  222. diffusers/quantizers/gguf/utils.py +456 -0
  223. diffusers/quantizers/quantization_config.py +669 -0
  224. diffusers/quantizers/torchao/__init__.py +15 -0
  225. diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
  226. diffusers/schedulers/scheduling_ddim.py +4 -1
  227. diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
  228. diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
  229. diffusers/schedulers/scheduling_ddpm.py +6 -7
  230. diffusers/schedulers/scheduling_ddpm_parallel.py +6 -7
  231. diffusers/schedulers/scheduling_deis_multistep.py +102 -6
  232. diffusers/schedulers/scheduling_dpmsolver_multistep.py +113 -6
  233. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +111 -5
  234. diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
  235. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +126 -7
  236. diffusers/schedulers/scheduling_edm_euler.py +8 -6
  237. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
  238. diffusers/schedulers/scheduling_euler_discrete.py +92 -7
  239. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
  240. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
  241. diffusers/schedulers/scheduling_heun_discrete.py +114 -8
  242. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
  243. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
  244. diffusers/schedulers/scheduling_lcm.py +2 -6
  245. diffusers/schedulers/scheduling_lms_discrete.py +76 -1
  246. diffusers/schedulers/scheduling_repaint.py +1 -1
  247. diffusers/schedulers/scheduling_sasolver.py +102 -6
  248. diffusers/schedulers/scheduling_tcd.py +2 -6
  249. diffusers/schedulers/scheduling_unclip.py +4 -1
  250. diffusers/schedulers/scheduling_unipc_multistep.py +127 -5
  251. diffusers/training_utils.py +63 -19
  252. diffusers/utils/__init__.py +7 -1
  253. diffusers/utils/constants.py +1 -0
  254. diffusers/utils/dummy_pt_objects.py +240 -0
  255. diffusers/utils/dummy_torch_and_transformers_objects.py +435 -0
  256. diffusers/utils/dynamic_modules_utils.py +3 -3
  257. diffusers/utils/hub_utils.py +44 -40
  258. diffusers/utils/import_utils.py +98 -8
  259. diffusers/utils/loading_utils.py +28 -4
  260. diffusers/utils/peft_utils.py +6 -3
  261. diffusers/utils/testing_utils.py +115 -1
  262. diffusers/utils/torch_utils.py +3 -0
  263. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/METADATA +73 -72
  264. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/RECORD +268 -193
  265. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
  266. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
  267. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
  268. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,20 @@
1
+ from dataclasses import dataclass
2
+
3
+ import torch
4
+
5
+ from diffusers.utils import BaseOutput
6
+
7
+
8
+ @dataclass
9
+ class HunyuanVideoPipelineOutput(BaseOutput):
10
+ r"""
11
+ Output class for HunyuanVideo pipelines.
12
+
13
+ Args:
14
+ frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
15
+ List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
16
+ denoised PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
17
+ `(batch_size, num_frames, channels, height, width)`.
18
+ """
19
+
20
+ frames: torch.Tensor
@@ -125,9 +125,21 @@ def get_resize_crop_region_for_grid(src, tgt_size):
125
125
 
126
126
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
127
127
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
128
- """
129
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
130
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
128
+ r"""
129
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
130
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
131
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
132
+
133
+ Args:
134
+ noise_cfg (`torch.Tensor`):
135
+ The predicted noise tensor for the guided diffusion process.
136
+ noise_pred_text (`torch.Tensor`):
137
+ The predicted noise tensor for the text-guided diffusion process.
138
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
139
+ A rescale factor applied to the noise predictions.
140
+
141
+ Returns:
142
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
131
143
  """
132
144
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
133
145
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -786,7 +798,11 @@ class HunyuanDiTPipeline(DiffusionPipeline):
786
798
  base_size = 512 // 8 // self.transformer.config.patch_size
787
799
  grid_crops_coords = get_resize_crop_region_for_grid((grid_height, grid_width), base_size)
788
800
  image_rotary_emb = get_2d_rotary_pos_embed(
789
- self.transformer.inner_dim // self.transformer.num_heads, grid_crops_coords, (grid_height, grid_width)
801
+ self.transformer.inner_dim // self.transformer.num_heads,
802
+ grid_crops_coords,
803
+ (grid_height, grid_width),
804
+ device=device,
805
+ output_type="pt",
790
806
  )
791
807
 
792
808
  style = torch.tensor([0], device=device)
@@ -193,15 +193,15 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
193
193
  def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
194
194
  self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
195
195
 
196
- def enable_sequential_cpu_offload(self, gpu_id=0):
196
+ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
197
197
  r"""
198
198
  Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
199
199
  Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
200
200
  GPU only when their specific submodule's `forward` method is called. Offloading happens on a submodule basis.
201
201
  Memory savings are higher than using `enable_model_cpu_offload`, but performance is lower.
202
202
  """
203
- self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
204
- self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
203
+ self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
204
+ self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
205
205
 
206
206
  def progress_bar(self, iterable=None, total=None):
207
207
  self.prior_pipe.progress_bar(iterable=iterable, total=total)
@@ -411,7 +411,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
411
411
  def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
412
412
  self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
413
413
 
414
- def enable_sequential_cpu_offload(self, gpu_id=0):
414
+ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
415
415
  r"""
416
416
  Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
417
417
  text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
@@ -419,8 +419,8 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
419
419
  Note that offloading happens on a submodule basis. Memory savings are higher than with
420
420
  `enable_model_cpu_offload`, but performance is lower.
421
421
  """
422
- self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
423
- self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
422
+ self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
423
+ self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
424
424
 
425
425
  def progress_bar(self, iterable=None, total=None):
426
426
  self.prior_pipe.progress_bar(iterable=iterable, total=total)
@@ -652,7 +652,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
652
652
  def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
653
653
  self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
654
654
 
655
- def enable_sequential_cpu_offload(self, gpu_id=0):
655
+ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
656
656
  r"""
657
657
  Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
658
658
  text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
@@ -660,8 +660,8 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
660
660
  Note that offloading happens on a submodule basis. Memory savings are higher than with
661
661
  `enable_model_cpu_offload`, but performance is lower.
662
662
  """
663
- self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
664
- self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
663
+ self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
664
+ self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
665
665
 
666
666
  def progress_bar(self, iterable=None, total=None):
667
667
  self.prior_pipe.progress_bar(iterable=iterable, total=total)
@@ -547,7 +547,7 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
547
547
  negative_image_embeds = prior_outputs[1]
548
548
 
549
549
  prompt = [prompt] if not isinstance(prompt, (list, tuple)) else prompt
550
- image = [image] if isinstance(prompt, PIL.Image.Image) else image
550
+ image = [image] if isinstance(image, PIL.Image.Image) else image
551
551
 
552
552
  if len(prompt) < image_embeds.shape[0] and image_embeds.shape[0] % len(prompt) == 0:
553
553
  prompt = (image_embeds.shape[0] // len(prompt)) * prompt
@@ -813,7 +813,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
813
813
  negative_image_embeds = prior_outputs[1]
814
814
 
815
815
  prompt = [prompt] if not isinstance(prompt, (list, tuple)) else prompt
816
- image = [image] if isinstance(prompt, PIL.Image.Image) else image
816
+ image = [image] if isinstance(image, PIL.Image.Image) else image
817
817
  mask_image = [mask_image] if isinstance(mask_image, PIL.Image.Image) else mask_image
818
818
 
819
819
  if len(prompt) < image_embeds.shape[0] and image_embeds.shape[0] % len(prompt) == 0:
@@ -70,7 +70,7 @@ def retrieve_timesteps(
70
70
  sigmas: Optional[List[float]] = None,
71
71
  **kwargs,
72
72
  ):
73
- """
73
+ r"""
74
74
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
75
75
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
76
76
 
@@ -89,7 +89,7 @@ def retrieve_timesteps(
89
89
  sigmas: Optional[List[float]] = None,
90
90
  **kwargs,
91
91
  ):
92
- """
92
+ r"""
93
93
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
94
94
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
95
95
 
@@ -564,14 +564,16 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
564
564
  if denoising_start is None:
565
565
  init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
566
566
  t_start = max(num_inference_steps - init_timestep, 0)
567
- else:
568
- t_start = 0
569
567
 
570
- timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
568
+ timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
569
+ if hasattr(self.scheduler, "set_begin_index"):
570
+ self.scheduler.set_begin_index(t_start * self.scheduler.order)
571
571
 
572
- # Strength is irrelevant if we directly request a timestep to start at;
573
- # that is, strength is determined by the denoising_start instead.
574
- if denoising_start is not None:
572
+ return timesteps, num_inference_steps - t_start
573
+
574
+ else:
575
+ # Strength is irrelevant if we directly request a timestep to start at;
576
+ # that is, strength is determined by the denoising_start instead.
575
577
  discrete_timestep_cutoff = int(
576
578
  round(
577
579
  self.scheduler.config.num_train_timesteps
@@ -579,7 +581,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
579
581
  )
580
582
  )
581
583
 
582
- num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
584
+ num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
583
585
  if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
584
586
  # if the scheduler is a 2nd order scheduler we might have to do +1
585
587
  # because `num_inference_steps` might be even given that every timestep
@@ -590,11 +592,12 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
590
592
  num_inference_steps = num_inference_steps + 1
591
593
 
592
594
  # because t_n+1 >= t_n, we slice the timesteps starting from the end
593
- timesteps = timesteps[-num_inference_steps:]
595
+ t_start = len(self.scheduler.timesteps) - num_inference_steps
596
+ timesteps = self.scheduler.timesteps[t_start:]
597
+ if hasattr(self.scheduler, "set_begin_index"):
598
+ self.scheduler.set_begin_index(t_start)
594
599
  return timesteps, num_inference_steps
595
600
 
596
- return timesteps, num_inference_steps - t_start
597
-
598
601
  # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline.prepare_latents
599
602
  def prepare_latents(
600
603
  self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
@@ -590,7 +590,7 @@ class GLMTransformer(torch.nn.Module):
590
590
  if not kv_caches:
591
591
  kv_caches = [None for _ in range(self.num_layers)]
592
592
  presents = () if use_cache else None
593
- if self.gradient_checkpointing and self.training:
593
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
594
594
  if use_cache:
595
595
  logger.warning_once(
596
596
  "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
@@ -604,7 +604,7 @@ class GLMTransformer(torch.nn.Module):
604
604
  all_hidden_states = all_hidden_states + (hidden_states,)
605
605
 
606
606
  layer = self._get_layer(index)
607
- if self.gradient_checkpointing and self.training:
607
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
608
608
  layer_ret = torch.utils.checkpoint.checkpoint(
609
609
  layer, hidden_states, attention_mask, rotary_pos_emb, kv_caches[index], use_cache
610
610
  )
@@ -277,6 +277,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
277
277
  padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
278
278
  pad_to_multiple_of: Optional[int] = None,
279
279
  return_attention_mask: Optional[bool] = None,
280
+ padding_side: Optional[bool] = None,
280
281
  ) -> dict:
281
282
  """
282
283
  Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
@@ -298,6 +299,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
298
299
  pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
299
300
  This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
300
301
  `>= 7.5` (Volta).
302
+ padding_side (`str`, *optional*):
303
+ The side on which the model should have padding applied. Should be selected between ['right', 'left'].
304
+ Default value is picked from the class attribute of the same name.
301
305
  return_attention_mask:
302
306
  (optional) Set to False to avoid returning attention mask (default: set to model specifics)
303
307
  """
@@ -66,7 +66,7 @@ def retrieve_timesteps(
66
66
  sigmas: Optional[List[float]] = None,
67
67
  **kwargs,
68
68
  ):
69
- """
69
+ r"""
70
70
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
71
71
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
72
72
 
@@ -70,7 +70,7 @@ def retrieve_timesteps(
70
70
  sigmas: Optional[List[float]] = None,
71
71
  **kwargs,
72
72
  ):
73
- """
73
+ r"""
74
74
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
75
75
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
76
76
 
@@ -675,7 +675,7 @@ class LDMBertEncoder(LDMBertPreTrainedModel):
675
675
  for idx, encoder_layer in enumerate(self.layers):
676
676
  if output_hidden_states:
677
677
  encoder_states = encoder_states + (hidden_states,)
678
- if self.gradient_checkpointing and self.training:
678
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
679
679
 
680
680
  def create_custom_forward(module):
681
681
  def custom_forward(*inputs):
@@ -56,7 +56,7 @@ EXAMPLE_DOC_STRING = """
56
56
  >>> from diffusers.utils import export_to_gif
57
57
 
58
58
  >>> # You can replace the checkpoint id with "maxin-cn/Latte-1" too.
59
- >>> pipe = LattePipeline.from_pretrained("maxin-cn/Latte-1", torch_dtype=torch.float16).to("cuda")
59
+ >>> pipe = LattePipeline.from_pretrained("maxin-cn/Latte-1", torch_dtype=torch.float16)
60
60
  >>> # Enable memory optimizations.
61
61
  >>> pipe.enable_model_cpu_offload()
62
62
 
@@ -76,7 +76,7 @@ def retrieve_timesteps(
76
76
  sigmas: Optional[List[float]] = None,
77
77
  **kwargs,
78
78
  ):
79
- """
79
+ r"""
80
80
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
81
81
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
82
82
 
@@ -234,9 +234,21 @@ class LEDITSCrossAttnProcessor:
234
234
 
235
235
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
236
236
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
237
- """
238
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
239
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
237
+ r"""
238
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
239
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
240
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
241
+
242
+ Args:
243
+ noise_cfg (`torch.Tensor`):
244
+ The predicted noise tensor for the guided diffusion process.
245
+ noise_pred_text (`torch.Tensor`):
246
+ The predicted noise tensor for the text-guided diffusion process.
247
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
248
+ A rescale factor applied to the noise predictions.
249
+
250
+ Returns:
251
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
240
252
  """
241
253
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
242
254
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -1643,9 +1643,21 @@ class LEditsPPPipelineStableDiffusionXL(
1643
1643
 
1644
1644
  # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.rescale_noise_cfg
1645
1645
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
1646
- """
1647
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
1648
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
1646
+ r"""
1647
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
1648
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
1649
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
1650
+
1651
+ Args:
1652
+ noise_cfg (`torch.Tensor`):
1653
+ The predicted noise tensor for the guided diffusion process.
1654
+ noise_pred_text (`torch.Tensor`):
1655
+ The predicted noise tensor for the text-guided diffusion process.
1656
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
1657
+ A rescale factor applied to the noise predictions.
1658
+
1659
+ Returns:
1660
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
1649
1661
  """
1650
1662
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
1651
1663
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -0,0 +1,50 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_ltx"] = ["LTXPipeline"]
26
+ _import_structure["pipeline_ltx_image2video"] = ["LTXImageToVideoPipeline"]
27
+
28
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
29
+ try:
30
+ if not (is_transformers_available() and is_torch_available()):
31
+ raise OptionalDependencyNotAvailable()
32
+
33
+ except OptionalDependencyNotAvailable:
34
+ from ...utils.dummy_torch_and_transformers_objects import *
35
+ else:
36
+ from .pipeline_ltx import LTXPipeline
37
+ from .pipeline_ltx_image2video import LTXImageToVideoPipeline
38
+
39
+ else:
40
+ import sys
41
+
42
+ sys.modules[__name__] = _LazyModule(
43
+ __name__,
44
+ globals()["__file__"],
45
+ _import_structure,
46
+ module_spec=__spec__,
47
+ )
48
+
49
+ for name, value in _dummy_objects.items():
50
+ setattr(sys.modules[__name__], name, value)