diffusers 0.27.1__py3-none-any.whl → 0.32.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (445) hide show
  1. diffusers/__init__.py +233 -6
  2. diffusers/callbacks.py +209 -0
  3. diffusers/commands/env.py +102 -6
  4. diffusers/configuration_utils.py +45 -16
  5. diffusers/dependency_versions_table.py +4 -3
  6. diffusers/image_processor.py +434 -110
  7. diffusers/loaders/__init__.py +42 -9
  8. diffusers/loaders/ip_adapter.py +626 -36
  9. diffusers/loaders/lora_base.py +900 -0
  10. diffusers/loaders/lora_conversion_utils.py +991 -125
  11. diffusers/loaders/lora_pipeline.py +3812 -0
  12. diffusers/loaders/peft.py +571 -7
  13. diffusers/loaders/single_file.py +405 -173
  14. diffusers/loaders/single_file_model.py +385 -0
  15. diffusers/loaders/single_file_utils.py +1783 -713
  16. diffusers/loaders/textual_inversion.py +41 -23
  17. diffusers/loaders/transformer_flux.py +181 -0
  18. diffusers/loaders/transformer_sd3.py +89 -0
  19. diffusers/loaders/unet.py +464 -540
  20. diffusers/loaders/unet_loader_utils.py +163 -0
  21. diffusers/models/__init__.py +76 -7
  22. diffusers/models/activations.py +65 -10
  23. diffusers/models/adapter.py +53 -53
  24. diffusers/models/attention.py +605 -18
  25. diffusers/models/attention_flax.py +1 -1
  26. diffusers/models/attention_processor.py +4304 -687
  27. diffusers/models/autoencoders/__init__.py +8 -0
  28. diffusers/models/autoencoders/autoencoder_asym_kl.py +15 -17
  29. diffusers/models/autoencoders/autoencoder_dc.py +620 -0
  30. diffusers/models/autoencoders/autoencoder_kl.py +110 -28
  31. diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
  32. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1482 -0
  33. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
  34. diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
  35. diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
  36. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +19 -24
  37. diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
  38. diffusers/models/autoencoders/autoencoder_tiny.py +21 -18
  39. diffusers/models/autoencoders/consistency_decoder_vae.py +45 -20
  40. diffusers/models/autoencoders/vae.py +41 -29
  41. diffusers/models/autoencoders/vq_model.py +182 -0
  42. diffusers/models/controlnet.py +47 -800
  43. diffusers/models/controlnet_flux.py +70 -0
  44. diffusers/models/controlnet_sd3.py +68 -0
  45. diffusers/models/controlnet_sparsectrl.py +116 -0
  46. diffusers/models/controlnets/__init__.py +23 -0
  47. diffusers/models/controlnets/controlnet.py +872 -0
  48. diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +9 -9
  49. diffusers/models/controlnets/controlnet_flux.py +536 -0
  50. diffusers/models/controlnets/controlnet_hunyuan.py +401 -0
  51. diffusers/models/controlnets/controlnet_sd3.py +489 -0
  52. diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
  53. diffusers/models/controlnets/controlnet_union.py +832 -0
  54. diffusers/models/controlnets/controlnet_xs.py +1946 -0
  55. diffusers/models/controlnets/multicontrolnet.py +183 -0
  56. diffusers/models/downsampling.py +85 -18
  57. diffusers/models/embeddings.py +1856 -158
  58. diffusers/models/embeddings_flax.py +23 -9
  59. diffusers/models/model_loading_utils.py +480 -0
  60. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  61. diffusers/models/modeling_flax_utils.py +2 -7
  62. diffusers/models/modeling_outputs.py +14 -0
  63. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  64. diffusers/models/modeling_utils.py +611 -146
  65. diffusers/models/normalization.py +361 -20
  66. diffusers/models/resnet.py +18 -23
  67. diffusers/models/transformers/__init__.py +16 -0
  68. diffusers/models/transformers/auraflow_transformer_2d.py +544 -0
  69. diffusers/models/transformers/cogvideox_transformer_3d.py +542 -0
  70. diffusers/models/transformers/dit_transformer_2d.py +240 -0
  71. diffusers/models/transformers/dual_transformer_2d.py +9 -8
  72. diffusers/models/transformers/hunyuan_transformer_2d.py +578 -0
  73. diffusers/models/transformers/latte_transformer_3d.py +327 -0
  74. diffusers/models/transformers/lumina_nextdit2d.py +340 -0
  75. diffusers/models/transformers/pixart_transformer_2d.py +445 -0
  76. diffusers/models/transformers/prior_transformer.py +13 -13
  77. diffusers/models/transformers/sana_transformer.py +488 -0
  78. diffusers/models/transformers/stable_audio_transformer.py +458 -0
  79. diffusers/models/transformers/t5_film_transformer.py +17 -19
  80. diffusers/models/transformers/transformer_2d.py +297 -187
  81. diffusers/models/transformers/transformer_allegro.py +422 -0
  82. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  83. diffusers/models/transformers/transformer_flux.py +593 -0
  84. diffusers/models/transformers/transformer_hunyuan_video.py +791 -0
  85. diffusers/models/transformers/transformer_ltx.py +469 -0
  86. diffusers/models/transformers/transformer_mochi.py +499 -0
  87. diffusers/models/transformers/transformer_sd3.py +461 -0
  88. diffusers/models/transformers/transformer_temporal.py +21 -19
  89. diffusers/models/unets/unet_1d.py +8 -8
  90. diffusers/models/unets/unet_1d_blocks.py +31 -31
  91. diffusers/models/unets/unet_2d.py +17 -10
  92. diffusers/models/unets/unet_2d_blocks.py +225 -149
  93. diffusers/models/unets/unet_2d_condition.py +41 -40
  94. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  95. diffusers/models/unets/unet_3d_blocks.py +192 -1057
  96. diffusers/models/unets/unet_3d_condition.py +22 -27
  97. diffusers/models/unets/unet_i2vgen_xl.py +22 -18
  98. diffusers/models/unets/unet_kandinsky3.py +2 -2
  99. diffusers/models/unets/unet_motion_model.py +1413 -89
  100. diffusers/models/unets/unet_spatio_temporal_condition.py +40 -16
  101. diffusers/models/unets/unet_stable_cascade.py +19 -18
  102. diffusers/models/unets/uvit_2d.py +2 -2
  103. diffusers/models/upsampling.py +95 -26
  104. diffusers/models/vq_model.py +12 -164
  105. diffusers/optimization.py +1 -1
  106. diffusers/pipelines/__init__.py +202 -3
  107. diffusers/pipelines/allegro/__init__.py +48 -0
  108. diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
  109. diffusers/pipelines/allegro/pipeline_output.py +23 -0
  110. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  111. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  112. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  113. diffusers/pipelines/animatediff/__init__.py +8 -0
  114. diffusers/pipelines/animatediff/pipeline_animatediff.py +122 -109
  115. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1106 -0
  116. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1288 -0
  117. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1010 -0
  118. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +236 -180
  119. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  120. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  121. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  122. diffusers/pipelines/audioldm2/modeling_audioldm2.py +58 -39
  123. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +121 -36
  124. diffusers/pipelines/aura_flow/__init__.py +48 -0
  125. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +584 -0
  126. diffusers/pipelines/auto_pipeline.py +196 -28
  127. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  128. diffusers/pipelines/blip_diffusion/modeling_blip2.py +6 -6
  129. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  130. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  131. diffusers/pipelines/cogvideo/__init__.py +54 -0
  132. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +772 -0
  133. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
  134. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +885 -0
  135. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +851 -0
  136. diffusers/pipelines/cogvideo/pipeline_output.py +20 -0
  137. diffusers/pipelines/cogview3/__init__.py +47 -0
  138. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  139. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  140. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +6 -6
  141. diffusers/pipelines/controlnet/__init__.py +86 -80
  142. diffusers/pipelines/controlnet/multicontrolnet.py +7 -182
  143. diffusers/pipelines/controlnet/pipeline_controlnet.py +134 -87
  144. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  145. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +93 -77
  146. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +88 -197
  147. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +136 -90
  148. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +176 -80
  149. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +125 -89
  150. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
  151. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
  152. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
  153. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
  154. diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
  155. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1060 -0
  156. diffusers/pipelines/controlnet_sd3/__init__.py +57 -0
  157. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +1133 -0
  158. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  159. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  160. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +916 -0
  161. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1111 -0
  162. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  163. diffusers/pipelines/deepfloyd_if/pipeline_if.py +16 -30
  164. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +20 -35
  165. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +23 -41
  166. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +22 -38
  167. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +25 -41
  168. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +19 -34
  169. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  170. diffusers/pipelines/deepfloyd_if/watermark.py +1 -1
  171. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  172. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +70 -30
  173. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +48 -25
  174. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  175. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  176. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +21 -20
  177. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +27 -29
  178. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +33 -27
  179. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +33 -23
  180. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +36 -30
  181. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +102 -69
  182. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  183. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  184. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  185. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  186. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  187. diffusers/pipelines/dit/pipeline_dit.py +7 -4
  188. diffusers/pipelines/flux/__init__.py +69 -0
  189. diffusers/pipelines/flux/modeling_flux.py +47 -0
  190. diffusers/pipelines/flux/pipeline_flux.py +957 -0
  191. diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
  192. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
  193. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
  194. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
  195. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
  196. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
  197. diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
  198. diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
  199. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
  200. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
  201. diffusers/pipelines/flux/pipeline_output.py +37 -0
  202. diffusers/pipelines/free_init_utils.py +41 -38
  203. diffusers/pipelines/free_noise_utils.py +596 -0
  204. diffusers/pipelines/hunyuan_video/__init__.py +48 -0
  205. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
  206. diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
  207. diffusers/pipelines/hunyuandit/__init__.py +48 -0
  208. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +916 -0
  209. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  210. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  211. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +32 -29
  212. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  213. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  214. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  215. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  216. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +34 -31
  217. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  218. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  219. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  220. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  221. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  222. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  223. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  224. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +22 -35
  225. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +26 -37
  226. diffusers/pipelines/kolors/__init__.py +54 -0
  227. diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
  228. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1250 -0
  229. diffusers/pipelines/kolors/pipeline_output.py +21 -0
  230. diffusers/pipelines/kolors/text_encoder.py +889 -0
  231. diffusers/pipelines/kolors/tokenizer.py +338 -0
  232. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +82 -62
  233. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +77 -60
  234. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +12 -12
  235. diffusers/pipelines/latte/__init__.py +48 -0
  236. diffusers/pipelines/latte/pipeline_latte.py +881 -0
  237. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +80 -74
  238. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +85 -76
  239. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  240. diffusers/pipelines/ltx/__init__.py +50 -0
  241. diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
  242. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
  243. diffusers/pipelines/ltx/pipeline_output.py +20 -0
  244. diffusers/pipelines/lumina/__init__.py +48 -0
  245. diffusers/pipelines/lumina/pipeline_lumina.py +890 -0
  246. diffusers/pipelines/marigold/__init__.py +50 -0
  247. diffusers/pipelines/marigold/marigold_image_processing.py +576 -0
  248. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  249. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  250. diffusers/pipelines/mochi/__init__.py +48 -0
  251. diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
  252. diffusers/pipelines/mochi/pipeline_output.py +20 -0
  253. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  254. diffusers/pipelines/pag/__init__.py +80 -0
  255. diffusers/pipelines/pag/pag_utils.py +243 -0
  256. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1328 -0
  257. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
  258. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1610 -0
  259. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
  260. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +969 -0
  261. diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
  262. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +865 -0
  263. diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
  264. diffusers/pipelines/pag/pipeline_pag_sd.py +1062 -0
  265. diffusers/pipelines/pag/pipeline_pag_sd_3.py +994 -0
  266. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
  267. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +866 -0
  268. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
  269. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
  270. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1345 -0
  271. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1544 -0
  272. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1776 -0
  273. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  274. diffusers/pipelines/pia/pipeline_pia.py +74 -164
  275. diffusers/pipelines/pipeline_flax_utils.py +5 -10
  276. diffusers/pipelines/pipeline_loading_utils.py +515 -53
  277. diffusers/pipelines/pipeline_utils.py +411 -222
  278. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  279. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +76 -93
  280. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +873 -0
  281. diffusers/pipelines/sana/__init__.py +47 -0
  282. diffusers/pipelines/sana/pipeline_output.py +21 -0
  283. diffusers/pipelines/sana/pipeline_sana.py +884 -0
  284. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +27 -23
  285. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  286. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  287. diffusers/pipelines/shap_e/renderer.py +1 -1
  288. diffusers/pipelines/stable_audio/__init__.py +50 -0
  289. diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
  290. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +756 -0
  291. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +71 -25
  292. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  293. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +35 -34
  294. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  295. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +20 -11
  296. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  297. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  298. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  299. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +145 -79
  300. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +43 -28
  301. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  302. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +100 -68
  303. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +109 -201
  304. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +131 -32
  305. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +247 -87
  306. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +30 -29
  307. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +35 -27
  308. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +49 -42
  309. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  310. diffusers/pipelines/stable_diffusion_3/__init__.py +54 -0
  311. diffusers/pipelines/stable_diffusion_3/pipeline_output.py +21 -0
  312. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +1140 -0
  313. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +1036 -0
  314. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1250 -0
  315. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +29 -20
  316. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +59 -58
  317. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +31 -25
  318. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +38 -22
  319. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -24
  320. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -23
  321. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +107 -67
  322. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +316 -69
  323. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  324. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  325. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +98 -30
  326. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +121 -83
  327. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +161 -105
  328. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +142 -218
  329. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -29
  330. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  331. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  332. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +69 -39
  333. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +105 -74
  334. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  335. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +29 -49
  336. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +32 -93
  337. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +37 -25
  338. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +54 -40
  339. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  340. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  341. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  342. diffusers/pipelines/unidiffuser/modeling_uvit.py +12 -12
  343. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +29 -28
  344. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  345. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  346. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +6 -8
  347. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  348. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  349. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +15 -14
  350. diffusers/{models/dual_transformer_2d.py → quantizers/__init__.py} +2 -6
  351. diffusers/quantizers/auto.py +139 -0
  352. diffusers/quantizers/base.py +233 -0
  353. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  354. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
  355. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  356. diffusers/quantizers/gguf/__init__.py +1 -0
  357. diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
  358. diffusers/quantizers/gguf/utils.py +456 -0
  359. diffusers/quantizers/quantization_config.py +669 -0
  360. diffusers/quantizers/torchao/__init__.py +15 -0
  361. diffusers/quantizers/torchao/torchao_quantizer.py +292 -0
  362. diffusers/schedulers/__init__.py +12 -2
  363. diffusers/schedulers/deprecated/__init__.py +1 -1
  364. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  365. diffusers/schedulers/scheduling_amused.py +5 -5
  366. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  367. diffusers/schedulers/scheduling_consistency_models.py +23 -25
  368. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
  369. diffusers/schedulers/scheduling_ddim.py +27 -26
  370. diffusers/schedulers/scheduling_ddim_cogvideox.py +452 -0
  371. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  372. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  373. diffusers/schedulers/scheduling_ddim_parallel.py +32 -31
  374. diffusers/schedulers/scheduling_ddpm.py +27 -30
  375. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  376. diffusers/schedulers/scheduling_ddpm_parallel.py +33 -36
  377. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  378. diffusers/schedulers/scheduling_deis_multistep.py +150 -50
  379. diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
  380. diffusers/schedulers/scheduling_dpmsolver_multistep.py +221 -84
  381. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  382. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +158 -52
  383. diffusers/schedulers/scheduling_dpmsolver_sde.py +153 -34
  384. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +275 -86
  385. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +81 -57
  386. diffusers/schedulers/scheduling_edm_euler.py +62 -39
  387. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +30 -29
  388. diffusers/schedulers/scheduling_euler_discrete.py +255 -74
  389. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +458 -0
  390. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +320 -0
  391. diffusers/schedulers/scheduling_heun_discrete.py +174 -46
  392. diffusers/schedulers/scheduling_ipndm.py +9 -9
  393. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +138 -29
  394. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +132 -26
  395. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  396. diffusers/schedulers/scheduling_lcm.py +23 -29
  397. diffusers/schedulers/scheduling_lms_discrete.py +105 -28
  398. diffusers/schedulers/scheduling_pndm.py +20 -20
  399. diffusers/schedulers/scheduling_repaint.py +21 -21
  400. diffusers/schedulers/scheduling_sasolver.py +157 -60
  401. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  402. diffusers/schedulers/scheduling_tcd.py +41 -36
  403. diffusers/schedulers/scheduling_unclip.py +19 -16
  404. diffusers/schedulers/scheduling_unipc_multistep.py +243 -47
  405. diffusers/schedulers/scheduling_utils.py +12 -5
  406. diffusers/schedulers/scheduling_utils_flax.py +1 -3
  407. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  408. diffusers/training_utils.py +214 -30
  409. diffusers/utils/__init__.py +17 -1
  410. diffusers/utils/constants.py +3 -0
  411. diffusers/utils/doc_utils.py +1 -0
  412. diffusers/utils/dummy_pt_objects.py +592 -7
  413. diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
  414. diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
  415. diffusers/utils/dummy_torch_and_transformers_objects.py +1001 -71
  416. diffusers/utils/dynamic_modules_utils.py +34 -29
  417. diffusers/utils/export_utils.py +50 -6
  418. diffusers/utils/hub_utils.py +131 -17
  419. diffusers/utils/import_utils.py +210 -8
  420. diffusers/utils/loading_utils.py +118 -5
  421. diffusers/utils/logging.py +4 -2
  422. diffusers/utils/peft_utils.py +37 -7
  423. diffusers/utils/state_dict_utils.py +13 -2
  424. diffusers/utils/testing_utils.py +193 -11
  425. diffusers/utils/torch_utils.py +4 -0
  426. diffusers/video_processor.py +113 -0
  427. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/METADATA +82 -91
  428. diffusers-0.32.2.dist-info/RECORD +550 -0
  429. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/WHEEL +1 -1
  430. diffusers/loaders/autoencoder.py +0 -146
  431. diffusers/loaders/controlnet.py +0 -136
  432. diffusers/loaders/lora.py +0 -1349
  433. diffusers/models/prior_transformer.py +0 -12
  434. diffusers/models/t5_film_transformer.py +0 -70
  435. diffusers/models/transformer_2d.py +0 -25
  436. diffusers/models/transformer_temporal.py +0 -34
  437. diffusers/models/unet_1d.py +0 -26
  438. diffusers/models/unet_1d_blocks.py +0 -203
  439. diffusers/models/unet_2d.py +0 -27
  440. diffusers/models/unet_2d_blocks.py +0 -375
  441. diffusers/models/unet_2d_condition.py +0 -25
  442. diffusers-0.27.1.dist-info/RECORD +0 -399
  443. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/LICENSE +0 -0
  444. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/entry_points.txt +0 -0
  445. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,163 @@
1
+ # Copyright 2024 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import copy
15
+ from typing import TYPE_CHECKING, Dict, List, Union
16
+
17
+ from ..utils import logging
18
+
19
+
20
+ if TYPE_CHECKING:
21
+ # import here to avoid circular imports
22
+ from ..models import UNet2DConditionModel
23
+
24
+ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
25
+
26
+
27
+ def _translate_into_actual_layer_name(name):
28
+ """Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
29
+ if name == "mid":
30
+ return "mid_block.attentions.0"
31
+
32
+ updown, block, attn = name.split(".")
33
+
34
+ updown = updown.replace("down", "down_blocks").replace("up", "up_blocks")
35
+ block = block.replace("block_", "")
36
+ attn = "attentions." + attn
37
+
38
+ return ".".join((updown, block, attn))
39
+
40
+
41
+ def _maybe_expand_lora_scales(
42
+ unet: "UNet2DConditionModel", weight_scales: List[Union[float, Dict]], default_scale=1.0
43
+ ):
44
+ blocks_with_transformer = {
45
+ "down": [i for i, block in enumerate(unet.down_blocks) if hasattr(block, "attentions")],
46
+ "up": [i for i, block in enumerate(unet.up_blocks) if hasattr(block, "attentions")],
47
+ }
48
+ transformer_per_block = {"down": unet.config.layers_per_block, "up": unet.config.layers_per_block + 1}
49
+
50
+ expanded_weight_scales = [
51
+ _maybe_expand_lora_scales_for_one_adapter(
52
+ weight_for_adapter,
53
+ blocks_with_transformer,
54
+ transformer_per_block,
55
+ unet.state_dict(),
56
+ default_scale=default_scale,
57
+ )
58
+ for weight_for_adapter in weight_scales
59
+ ]
60
+
61
+ return expanded_weight_scales
62
+
63
+
64
+ def _maybe_expand_lora_scales_for_one_adapter(
65
+ scales: Union[float, Dict],
66
+ blocks_with_transformer: Dict[str, int],
67
+ transformer_per_block: Dict[str, int],
68
+ state_dict: None,
69
+ default_scale: float = 1.0,
70
+ ):
71
+ """
72
+ Expands the inputs into a more granular dictionary. See the example below for more details.
73
+
74
+ Parameters:
75
+ scales (`Union[float, Dict]`):
76
+ Scales dict to expand.
77
+ blocks_with_transformer (`Dict[str, int]`):
78
+ Dict with keys 'up' and 'down', showing which blocks have transformer layers
79
+ transformer_per_block (`Dict[str, int]`):
80
+ Dict with keys 'up' and 'down', showing how many transformer layers each block has
81
+
82
+ E.g. turns
83
+ ```python
84
+ scales = {"down": 2, "mid": 3, "up": {"block_0": 4, "block_1": [5, 6, 7]}}
85
+ blocks_with_transformer = {"down": [1, 2], "up": [0, 1]}
86
+ transformer_per_block = {"down": 2, "up": 3}
87
+ ```
88
+ into
89
+ ```python
90
+ {
91
+ "down.block_1.0": 2,
92
+ "down.block_1.1": 2,
93
+ "down.block_2.0": 2,
94
+ "down.block_2.1": 2,
95
+ "mid": 3,
96
+ "up.block_0.0": 4,
97
+ "up.block_0.1": 4,
98
+ "up.block_0.2": 4,
99
+ "up.block_1.0": 5,
100
+ "up.block_1.1": 6,
101
+ "up.block_1.2": 7,
102
+ }
103
+ ```
104
+ """
105
+ if sorted(blocks_with_transformer.keys()) != ["down", "up"]:
106
+ raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`")
107
+
108
+ if sorted(transformer_per_block.keys()) != ["down", "up"]:
109
+ raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
110
+
111
+ if not isinstance(scales, dict):
112
+ # don't expand if scales is a single number
113
+ return scales
114
+
115
+ scales = copy.deepcopy(scales)
116
+
117
+ if "mid" not in scales:
118
+ scales["mid"] = default_scale
119
+ elif isinstance(scales["mid"], list):
120
+ if len(scales["mid"]) == 1:
121
+ scales["mid"] = scales["mid"][0]
122
+ else:
123
+ raise ValueError(f"Expected 1 scales for mid, got {len(scales['mid'])}.")
124
+
125
+ for updown in ["up", "down"]:
126
+ if updown not in scales:
127
+ scales[updown] = default_scale
128
+
129
+ # eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}}
130
+ if not isinstance(scales[updown], dict):
131
+ scales[updown] = {f"block_{i}": copy.deepcopy(scales[updown]) for i in blocks_with_transformer[updown]}
132
+
133
+ # eg {"down": {"block_1": 1}} to {"down": {"block_1": [1, 1]}}
134
+ for i in blocks_with_transformer[updown]:
135
+ block = f"block_{i}"
136
+ # set not assigned blocks to default scale
137
+ if block not in scales[updown]:
138
+ scales[updown][block] = default_scale
139
+ if not isinstance(scales[updown][block], list):
140
+ scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])]
141
+ elif len(scales[updown][block]) == 1:
142
+ # a list specifying scale to each masked IP input
143
+ scales[updown][block] = scales[updown][block] * transformer_per_block[updown]
144
+ elif len(scales[updown][block]) != transformer_per_block[updown]:
145
+ raise ValueError(
146
+ f"Expected {transformer_per_block[updown]} scales for {updown}.{block}, got {len(scales[updown][block])}."
147
+ )
148
+
149
+ # eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
150
+ for i in blocks_with_transformer[updown]:
151
+ block = f"block_{i}"
152
+ for tf_idx, value in enumerate(scales[updown][block]):
153
+ scales[f"{updown}.{block}.{tf_idx}"] = value
154
+
155
+ del scales[updown]
156
+
157
+ for layer in scales.keys():
158
+ if not any(_translate_into_actual_layer_name(layer) in module for module in state_dict.keys()):
159
+ raise ValueError(
160
+ f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or it has no attentions."
161
+ )
162
+
163
+ return {_translate_into_actual_layer_name(name): weight for name, weight in scales.items()}
@@ -27,17 +27,51 @@ _import_structure = {}
27
27
  if is_torch_available():
28
28
  _import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"]
29
29
  _import_structure["autoencoders.autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"]
30
+ _import_structure["autoencoders.autoencoder_dc"] = ["AutoencoderDC"]
30
31
  _import_structure["autoencoders.autoencoder_kl"] = ["AutoencoderKL"]
32
+ _import_structure["autoencoders.autoencoder_kl_allegro"] = ["AutoencoderKLAllegro"]
33
+ _import_structure["autoencoders.autoencoder_kl_cogvideox"] = ["AutoencoderKLCogVideoX"]
34
+ _import_structure["autoencoders.autoencoder_kl_hunyuan_video"] = ["AutoencoderKLHunyuanVideo"]
35
+ _import_structure["autoencoders.autoencoder_kl_ltx"] = ["AutoencoderKLLTXVideo"]
36
+ _import_structure["autoencoders.autoencoder_kl_mochi"] = ["AutoencoderKLMochi"]
31
37
  _import_structure["autoencoders.autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"]
38
+ _import_structure["autoencoders.autoencoder_oobleck"] = ["AutoencoderOobleck"]
32
39
  _import_structure["autoencoders.autoencoder_tiny"] = ["AutoencoderTiny"]
33
40
  _import_structure["autoencoders.consistency_decoder_vae"] = ["ConsistencyDecoderVAE"]
34
- _import_structure["controlnet"] = ["ControlNetModel"]
35
- _import_structure["dual_transformer_2d"] = ["DualTransformer2DModel"]
41
+ _import_structure["autoencoders.vq_model"] = ["VQModel"]
42
+ _import_structure["controlnets.controlnet"] = ["ControlNetModel"]
43
+ _import_structure["controlnets.controlnet_flux"] = ["FluxControlNetModel", "FluxMultiControlNetModel"]
44
+ _import_structure["controlnets.controlnet_hunyuan"] = [
45
+ "HunyuanDiT2DControlNetModel",
46
+ "HunyuanDiT2DMultiControlNetModel",
47
+ ]
48
+ _import_structure["controlnets.controlnet_sd3"] = ["SD3ControlNetModel", "SD3MultiControlNetModel"]
49
+ _import_structure["controlnets.controlnet_sparsectrl"] = ["SparseControlNetModel"]
50
+ _import_structure["controlnets.controlnet_union"] = ["ControlNetUnionModel"]
51
+ _import_structure["controlnets.controlnet_xs"] = ["ControlNetXSAdapter", "UNetControlNetXSModel"]
52
+ _import_structure["controlnets.multicontrolnet"] = ["MultiControlNetModel"]
36
53
  _import_structure["embeddings"] = ["ImageProjection"]
37
54
  _import_structure["modeling_utils"] = ["ModelMixin"]
55
+ _import_structure["transformers.auraflow_transformer_2d"] = ["AuraFlowTransformer2DModel"]
56
+ _import_structure["transformers.cogvideox_transformer_3d"] = ["CogVideoXTransformer3DModel"]
57
+ _import_structure["transformers.dit_transformer_2d"] = ["DiTTransformer2DModel"]
58
+ _import_structure["transformers.dual_transformer_2d"] = ["DualTransformer2DModel"]
59
+ _import_structure["transformers.hunyuan_transformer_2d"] = ["HunyuanDiT2DModel"]
60
+ _import_structure["transformers.latte_transformer_3d"] = ["LatteTransformer3DModel"]
61
+ _import_structure["transformers.lumina_nextdit2d"] = ["LuminaNextDiT2DModel"]
62
+ _import_structure["transformers.pixart_transformer_2d"] = ["PixArtTransformer2DModel"]
38
63
  _import_structure["transformers.prior_transformer"] = ["PriorTransformer"]
64
+ _import_structure["transformers.sana_transformer"] = ["SanaTransformer2DModel"]
65
+ _import_structure["transformers.stable_audio_transformer"] = ["StableAudioDiTModel"]
39
66
  _import_structure["transformers.t5_film_transformer"] = ["T5FilmDecoder"]
40
67
  _import_structure["transformers.transformer_2d"] = ["Transformer2DModel"]
68
+ _import_structure["transformers.transformer_allegro"] = ["AllegroTransformer3DModel"]
69
+ _import_structure["transformers.transformer_cogview3plus"] = ["CogView3PlusTransformer2DModel"]
70
+ _import_structure["transformers.transformer_flux"] = ["FluxTransformer2DModel"]
71
+ _import_structure["transformers.transformer_hunyuan_video"] = ["HunyuanVideoTransformer3DModel"]
72
+ _import_structure["transformers.transformer_ltx"] = ["LTXVideoTransformer3DModel"]
73
+ _import_structure["transformers.transformer_mochi"] = ["MochiTransformer3DModel"]
74
+ _import_structure["transformers.transformer_sd3"] = ["SD3Transformer2DModel"]
41
75
  _import_structure["transformers.transformer_temporal"] = ["TransformerTemporalModel"]
42
76
  _import_structure["unets.unet_1d"] = ["UNet1DModel"]
43
77
  _import_structure["unets.unet_2d"] = ["UNet2DModel"]
@@ -49,10 +83,9 @@ if is_torch_available():
49
83
  _import_structure["unets.unet_spatio_temporal_condition"] = ["UNetSpatioTemporalConditionModel"]
50
84
  _import_structure["unets.unet_stable_cascade"] = ["StableCascadeUNet"]
51
85
  _import_structure["unets.uvit_2d"] = ["UVit2DModel"]
52
- _import_structure["vq_model"] = ["VQModel"]
53
86
 
54
87
  if is_flax_available():
55
- _import_structure["controlnet_flax"] = ["FlaxControlNetModel"]
88
+ _import_structure["controlnets.controlnet_flax"] = ["FlaxControlNetModel"]
56
89
  _import_structure["unets.unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"]
57
90
  _import_structure["vae_flax"] = ["FlaxAutoencoderKL"]
58
91
 
@@ -62,17 +95,54 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
62
95
  from .adapter import MultiAdapter, T2IAdapter
63
96
  from .autoencoders import (
64
97
  AsymmetricAutoencoderKL,
98
+ AutoencoderDC,
65
99
  AutoencoderKL,
100
+ AutoencoderKLAllegro,
101
+ AutoencoderKLCogVideoX,
102
+ AutoencoderKLHunyuanVideo,
103
+ AutoencoderKLLTXVideo,
104
+ AutoencoderKLMochi,
66
105
  AutoencoderKLTemporalDecoder,
106
+ AutoencoderOobleck,
67
107
  AutoencoderTiny,
68
108
  ConsistencyDecoderVAE,
109
+ VQModel,
110
+ )
111
+ from .controlnets import (
112
+ ControlNetModel,
113
+ ControlNetUnionModel,
114
+ ControlNetXSAdapter,
115
+ FluxControlNetModel,
116
+ FluxMultiControlNetModel,
117
+ HunyuanDiT2DControlNetModel,
118
+ HunyuanDiT2DMultiControlNetModel,
119
+ MultiControlNetModel,
120
+ SD3ControlNetModel,
121
+ SD3MultiControlNetModel,
122
+ SparseControlNetModel,
123
+ UNetControlNetXSModel,
69
124
  )
70
- from .controlnet import ControlNetModel
71
125
  from .embeddings import ImageProjection
72
126
  from .modeling_utils import ModelMixin
73
127
  from .transformers import (
128
+ AllegroTransformer3DModel,
129
+ AuraFlowTransformer2DModel,
130
+ CogVideoXTransformer3DModel,
131
+ CogView3PlusTransformer2DModel,
132
+ DiTTransformer2DModel,
74
133
  DualTransformer2DModel,
134
+ FluxTransformer2DModel,
135
+ HunyuanDiT2DModel,
136
+ HunyuanVideoTransformer3DModel,
137
+ LatteTransformer3DModel,
138
+ LTXVideoTransformer3DModel,
139
+ LuminaNextDiT2DModel,
140
+ MochiTransformer3DModel,
141
+ PixArtTransformer2DModel,
75
142
  PriorTransformer,
143
+ SanaTransformer2DModel,
144
+ SD3Transformer2DModel,
145
+ StableAudioDiTModel,
76
146
  T5FilmDecoder,
77
147
  Transformer2DModel,
78
148
  TransformerTemporalModel,
@@ -90,10 +160,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
90
160
  UNetSpatioTemporalConditionModel,
91
161
  UVit2DModel,
92
162
  )
93
- from .vq_model import VQModel
94
163
 
95
164
  if is_flax_available():
96
- from .controlnet_flax import FlaxControlNetModel
165
+ from .controlnets import FlaxControlNetModel
97
166
  from .unets import FlaxUNet2DConditionModel
98
167
  from .vae_flax import FlaxAutoencoderKL
99
168
 
@@ -18,8 +18,12 @@ import torch.nn.functional as F
18
18
  from torch import nn
19
19
 
20
20
  from ..utils import deprecate
21
+ from ..utils.import_utils import is_torch_npu_available, is_torch_version
21
22
 
22
23
 
24
+ if is_torch_npu_available():
25
+ import torch_npu
26
+
23
27
  ACTIVATION_FUNCTIONS = {
24
28
  "swish": nn.SiLU(),
25
29
  "silu": nn.SiLU(),
@@ -46,6 +50,18 @@ def get_activation(act_fn: str) -> nn.Module:
46
50
  raise ValueError(f"Unsupported activation function: {act_fn}")
47
51
 
48
52
 
53
+ class FP32SiLU(nn.Module):
54
+ r"""
55
+ SiLU activation function with input upcasted to torch.float32.
56
+ """
57
+
58
+ def __init__(self):
59
+ super().__init__()
60
+
61
+ def forward(self, inputs: torch.Tensor) -> torch.Tensor:
62
+ return F.silu(inputs.float(), inplace=False).to(inputs.dtype)
63
+
64
+
49
65
  class GELU(nn.Module):
50
66
  r"""
51
67
  GELU activation function with tanh approximation support with `approximate="tanh"`.
@@ -63,10 +79,10 @@ class GELU(nn.Module):
63
79
  self.approximate = approximate
64
80
 
65
81
  def gelu(self, gate: torch.Tensor) -> torch.Tensor:
66
- if gate.device.type != "mps":
67
- return F.gelu(gate, approximate=self.approximate)
68
- # mps: gelu is not implemented for float16
69
- return F.gelu(gate.to(dtype=torch.float32), approximate=self.approximate).to(dtype=gate.dtype)
82
+ if gate.device.type == "mps" and is_torch_version("<", "2.0.0"):
83
+ # fp16 gelu not supported on mps before torch 2.0
84
+ return F.gelu(gate.to(dtype=torch.float32), approximate=self.approximate).to(dtype=gate.dtype)
85
+ return F.gelu(gate, approximate=self.approximate)
70
86
 
71
87
  def forward(self, hidden_states):
72
88
  hidden_states = self.proj(hidden_states)
@@ -89,18 +105,45 @@ class GEGLU(nn.Module):
89
105
  self.proj = nn.Linear(dim_in, dim_out * 2, bias=bias)
90
106
 
91
107
  def gelu(self, gate: torch.Tensor) -> torch.Tensor:
92
- if gate.device.type != "mps":
93
- return F.gelu(gate)
94
- # mps: gelu is not implemented for float16
95
- return F.gelu(gate.to(dtype=torch.float32)).to(dtype=gate.dtype)
108
+ if gate.device.type == "mps" and is_torch_version("<", "2.0.0"):
109
+ # fp16 gelu not supported on mps before torch 2.0
110
+ return F.gelu(gate.to(dtype=torch.float32)).to(dtype=gate.dtype)
111
+ return F.gelu(gate)
96
112
 
97
113
  def forward(self, hidden_states, *args, **kwargs):
98
114
  if len(args) > 0 or kwargs.get("scale", None) is not None:
99
115
  deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`."
100
116
  deprecate("scale", "1.0.0", deprecation_message)
117
+ hidden_states = self.proj(hidden_states)
118
+ if is_torch_npu_available():
119
+ # using torch_npu.npu_geglu can run faster and save memory on NPU.
120
+ return torch_npu.npu_geglu(hidden_states, dim=-1, approximate=1)[0]
121
+ else:
122
+ hidden_states, gate = hidden_states.chunk(2, dim=-1)
123
+ return hidden_states * self.gelu(gate)
101
124
 
102
- hidden_states, gate = self.proj(hidden_states).chunk(2, dim=-1)
103
- return hidden_states * self.gelu(gate)
125
+
126
+ class SwiGLU(nn.Module):
127
+ r"""
128
+ A [variant](https://arxiv.org/abs/2002.05202) of the gated linear unit activation function. It's similar to `GEGLU`
129
+ but uses SiLU / Swish instead of GeLU.
130
+
131
+ Parameters:
132
+ dim_in (`int`): The number of channels in the input.
133
+ dim_out (`int`): The number of channels in the output.
134
+ bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
135
+ """
136
+
137
+ def __init__(self, dim_in: int, dim_out: int, bias: bool = True):
138
+ super().__init__()
139
+
140
+ self.proj = nn.Linear(dim_in, dim_out * 2, bias=bias)
141
+ self.activation = nn.SiLU()
142
+
143
+ def forward(self, hidden_states):
144
+ hidden_states = self.proj(hidden_states)
145
+ hidden_states, gate = hidden_states.chunk(2, dim=-1)
146
+ return hidden_states * self.activation(gate)
104
147
 
105
148
 
106
149
  class ApproximateGELU(nn.Module):
@@ -121,3 +164,15 @@ class ApproximateGELU(nn.Module):
121
164
  def forward(self, x: torch.Tensor) -> torch.Tensor:
122
165
  x = self.proj(x)
123
166
  return x * torch.sigmoid(1.702 * x)
167
+
168
+
169
+ class LinearActivation(nn.Module):
170
+ def __init__(self, dim_in: int, dim_out: int, bias: bool = True, activation: str = "silu"):
171
+ super().__init__()
172
+
173
+ self.proj = nn.Linear(dim_in, dim_out, bias=bias)
174
+ self.activation = get_activation(activation)
175
+
176
+ def forward(self, hidden_states):
177
+ hidden_states = self.proj(hidden_states)
178
+ return self.activation(hidden_states)
@@ -30,10 +30,10 @@ class MultiAdapter(ModelMixin):
30
30
  MultiAdapter is a wrapper model that contains multiple adapter models and merges their outputs according to
31
31
  user-assigned weighting.
32
32
 
33
- This model inherits from [`ModelMixin`]. Check the superclass documentation for the generic methods the library
34
- implements for all the model (such as downloading or saving, etc.)
33
+ This model inherits from [`ModelMixin`]. Check the superclass documentation for common methods such as downloading
34
+ or saving.
35
35
 
36
- Parameters:
36
+ Args:
37
37
  adapters (`List[T2IAdapter]`, *optional*, defaults to None):
38
38
  A list of `T2IAdapter` model instances.
39
39
  """
@@ -77,11 +77,13 @@ class MultiAdapter(ModelMixin):
77
77
  r"""
78
78
  Args:
79
79
  xs (`torch.Tensor`):
80
- (batch, channel, height, width) input images for multiple adapter models concated along dimension 1,
81
- `channel` should equal to `num_adapter` * "number of channel of image".
80
+ A tensor of shape (batch, channel, height, width) representing input images for multiple adapter
81
+ models, concatenated along dimension 1(channel dimension). The `channel` dimension should be equal to
82
+ `num_adapter` * number of channel per image.
83
+
82
84
  adapter_weights (`List[float]`, *optional*, defaults to None):
83
- List of floats representing the weight which will be multiply to each adapter's output before adding
84
- them together.
85
+ A list of floats representing the weights which will be multiplied by each adapter's output before
86
+ summing them together. If `None`, equal weights will be used for all adapters.
85
87
  """
86
88
  if adapter_weights is None:
87
89
  adapter_weights = torch.tensor([1 / self.num_adapter] * self.num_adapter)
@@ -109,24 +111,24 @@ class MultiAdapter(ModelMixin):
109
111
  variant: Optional[str] = None,
110
112
  ):
111
113
  """
112
- Save a model and its configuration file to a directory, so that it can be re-loaded using the
114
+ Save a model and its configuration file to a specified directory, allowing it to be re-loaded with the
113
115
  `[`~models.adapter.MultiAdapter.from_pretrained`]` class method.
114
116
 
115
- Arguments:
117
+ Args:
116
118
  save_directory (`str` or `os.PathLike`):
117
- Directory to which to save. Will be created if it doesn't exist.
118
- is_main_process (`bool`, *optional*, defaults to `True`):
119
- Whether the process calling this is the main process or not. Useful when in distributed training like
120
- TPUs and need to call this function on all processes. In this case, set `is_main_process=True` only on
121
- the main process to avoid race conditions.
119
+ The directory where the model will be saved. If the directory does not exist, it will be created.
120
+ is_main_process (`bool`, optional, defaults=True):
121
+ Indicates whether current process is the main process or not. Useful for distributed training (e.g.,
122
+ TPUs) and need to call this function on all processes. In this case, set `is_main_process=True` only
123
+ for the main process to avoid race conditions.
122
124
  save_function (`Callable`):
123
- The function to use to save the state dictionary. Useful on distributed training like TPUs when one
124
- need to replace `torch.save` by another method. Can be configured with the environment variable
125
- `DIFFUSERS_SAVE_MODE`.
126
- safe_serialization (`bool`, *optional*, defaults to `True`):
127
- Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
125
+ Function used to save the state dictionary. Useful for distributed training (e.g., TPUs) to replace
126
+ `torch.save` with another method. Can also be configured using`DIFFUSERS_SAVE_MODE` environment
127
+ variable.
128
+ safe_serialization (`bool`, optional, defaults=True):
129
+ If `True`, save the model using `safetensors`. If `False`, save the model with `pickle`.
128
130
  variant (`str`, *optional*):
129
- If specified, weights are saved in the format pytorch_model.<variant>.bin.
131
+ If specified, weights are saved in the format `pytorch_model.<variant>.bin`.
130
132
  """
131
133
  idx = 0
132
134
  model_path_to_save = save_directory
@@ -145,19 +147,17 @@ class MultiAdapter(ModelMixin):
145
147
  @classmethod
146
148
  def from_pretrained(cls, pretrained_model_path: Optional[Union[str, os.PathLike]], **kwargs):
147
149
  r"""
148
- Instantiate a pretrained MultiAdapter model from multiple pre-trained adapter models.
150
+ Instantiate a pretrained `MultiAdapter` model from multiple pre-trained adapter models.
149
151
 
150
152
  The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). To train
151
- the model, you should first set it back in training mode with `model.train()`.
153
+ the model, set it back to training mode using `model.train()`.
152
154
 
153
- The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come
154
- pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning
155
- task.
155
+ Warnings:
156
+ *Weights from XXX not initialized from pretrained model* means that the weights of XXX are not pretrained
157
+ with the rest of the model. It is up to you to train those weights with a downstream fine-tuning. *Weights
158
+ from XXX not used in YYY* means that the layer XXX is not used by YYY, so those weights are discarded.
156
159
 
157
- The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those
158
- weights are discarded.
159
-
160
- Parameters:
160
+ Args:
161
161
  pretrained_model_path (`os.PathLike`):
162
162
  A path to a *directory* containing model weights saved using
163
163
  [`~diffusers.models.adapter.MultiAdapter.save_pretrained`], e.g., `./my_model_directory/adapter`.
@@ -175,20 +175,20 @@ class MultiAdapter(ModelMixin):
175
175
  more information about each option see [designing a device
176
176
  map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map).
177
177
  max_memory (`Dict`, *optional*):
178
- A dictionary device identifier to maximum memory. Will default to the maximum memory available for each
179
- GPU and the available CPU RAM if unset.
178
+ A dictionary mapping device identifiers to their maximum memory. Default to the maximum memory
179
+ available for each GPU and the available CPU RAM if unset.
180
180
  low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
181
181
  Speed up model loading by not initializing the weights and only loading the pre-trained weights. This
182
182
  also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the
183
183
  model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch,
184
184
  setting this argument to `True` will raise an error.
185
185
  variant (`str`, *optional*):
186
- If specified load weights from `variant` filename, *e.g.* pytorch_model.<variant>.bin. `variant` is
187
- ignored when using `from_flax`.
186
+ If specified, load weights from a `variant` file (*e.g.* pytorch_model.<variant>.bin). `variant` will
187
+ be ignored when using `from_flax`.
188
188
  use_safetensors (`bool`, *optional*, defaults to `None`):
189
- If set to `None`, the `safetensors` weights will be downloaded if they're available **and** if the
190
- `safetensors` library is installed. If set to `True`, the model will be forcibly loaded from
191
- `safetensors` weights. If set to `False`, loading will *not* use `safetensors`.
189
+ If `None`, the `safetensors` weights will be downloaded if available **and** if`safetensors` library is
190
+ installed. If `True`, the model will be forcibly loaded from`safetensors` weights. If `False`,
191
+ `safetensors` is not used.
192
192
  """
193
193
  idx = 0
194
194
  adapters = []
@@ -223,22 +223,22 @@ class T2IAdapter(ModelMixin, ConfigMixin):
223
223
  and
224
224
  [AdapterLight](https://github.com/TencentARC/T2I-Adapter/blob/686de4681515662c0ac2ffa07bf5dda83af1038a/ldm/modules/encoders/adapter.py#L235).
225
225
 
226
- This model inherits from [`ModelMixin`]. Check the superclass documentation for the generic methods the library
227
- implements for all the model (such as downloading or saving, etc.)
226
+ This model inherits from [`ModelMixin`]. Check the superclass documentation for the common methods, such as
227
+ downloading or saving.
228
228
 
229
- Parameters:
230
- in_channels (`int`, *optional*, defaults to 3):
231
- Number of channels of Aapter's input(*control image*). Set this parameter to 1 if you're using gray scale
232
- image as *control image*.
229
+ Args:
230
+ in_channels (`int`, *optional*, defaults to `3`):
231
+ The number of channels in the adapter's input (*control image*). Set it to 1 if you're using a gray scale
232
+ image.
233
233
  channels (`List[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`):
234
- The number of channel of each downsample block's output hidden state. The `len(block_out_channels)` will
235
- also determine the number of downsample blocks in the Adapter.
236
- num_res_blocks (`int`, *optional*, defaults to 2):
234
+ The number of channels in each downsample block's output hidden state. The `len(block_out_channels)`
235
+ determines the number of downsample blocks in the adapter.
236
+ num_res_blocks (`int`, *optional*, defaults to `2`):
237
237
  Number of ResNet blocks in each downsample block.
238
- downscale_factor (`int`, *optional*, defaults to 8):
238
+ downscale_factor (`int`, *optional*, defaults to `8`):
239
239
  A factor that determines the total downscale factor of the Adapter.
240
240
  adapter_type (`str`, *optional*, defaults to `full_adapter`):
241
- The type of Adapter to use. Choose either `full_adapter` or `full_adapter_xl` or `light_adapter`.
241
+ Adapter type (`full_adapter` or `full_adapter_xl` or `light_adapter`) to use.
242
242
  """
243
243
 
244
244
  @register_to_config
@@ -393,7 +393,7 @@ class AdapterBlock(nn.Module):
393
393
  An AdapterBlock is a helper model that contains multiple ResNet-like blocks. It is used in the `FullAdapter` and
394
394
  `FullAdapterXL` models.
395
395
 
396
- Parameters:
396
+ Args:
397
397
  in_channels (`int`):
398
398
  Number of channels of AdapterBlock's input.
399
399
  out_channels (`int`):
@@ -401,7 +401,7 @@ class AdapterBlock(nn.Module):
401
401
  num_res_blocks (`int`):
402
402
  Number of ResNet blocks in the AdapterBlock.
403
403
  down (`bool`, *optional*, defaults to `False`):
404
- Whether to perform downsampling on AdapterBlock's input.
404
+ If `True`, perform downsampling on AdapterBlock's input.
405
405
  """
406
406
 
407
407
  def __init__(self, in_channels: int, out_channels: int, num_res_blocks: int, down: bool = False):
@@ -440,7 +440,7 @@ class AdapterResnetBlock(nn.Module):
440
440
  r"""
441
441
  An `AdapterResnetBlock` is a helper model that implements a ResNet-like block.
442
442
 
443
- Parameters:
443
+ Args:
444
444
  channels (`int`):
445
445
  Number of channels of AdapterResnetBlock's input and output.
446
446
  """
@@ -518,7 +518,7 @@ class LightAdapterBlock(nn.Module):
518
518
  A `LightAdapterBlock` is a helper model that contains multiple `LightAdapterResnetBlocks`. It is used in the
519
519
  `LightAdapter` model.
520
520
 
521
- Parameters:
521
+ Args:
522
522
  in_channels (`int`):
523
523
  Number of channels of LightAdapterBlock's input.
524
524
  out_channels (`int`):
@@ -526,7 +526,7 @@ class LightAdapterBlock(nn.Module):
526
526
  num_res_blocks (`int`):
527
527
  Number of LightAdapterResnetBlocks in the LightAdapterBlock.
528
528
  down (`bool`, *optional*, defaults to `False`):
529
- Whether to perform downsampling on LightAdapterBlock's input.
529
+ If `True`, perform downsampling on LightAdapterBlock's input.
530
530
  """
531
531
 
532
532
  def __init__(self, in_channels: int, out_channels: int, num_res_blocks: int, down: bool = False):
@@ -561,7 +561,7 @@ class LightAdapterResnetBlock(nn.Module):
561
561
  A `LightAdapterResnetBlock` is a helper model that implements a ResNet-like block with a slightly different
562
562
  architecture than `AdapterResnetBlock`.
563
563
 
564
- Parameters:
564
+ Args:
565
565
  channels (`int`):
566
566
  Number of channels of LightAdapterResnetBlock's input and output.
567
567
  """