diffusers 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +198 -28
  13. diffusers/loaders/lora_conversion_utils.py +679 -44
  14. diffusers/loaders/lora_pipeline.py +1963 -801
  15. diffusers/loaders/peft.py +169 -84
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +653 -75
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +22 -32
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +409 -49
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +10 -2
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +14 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.1.dist-info/RECORD +0 -550
  387. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -40,7 +40,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
40
40
  force_download = kwargs.pop("force_download", False)
41
41
  proxies = kwargs.pop("proxies", None)
42
42
  local_files_only = kwargs.pop("local_files_only", None)
43
- token = kwargs.pop("token", None)
43
+ hf_token = kwargs.pop("hf_token", None)
44
44
  revision = kwargs.pop("revision", None)
45
45
  subfolder = kwargs.pop("subfolder", None)
46
46
  weight_name = kwargs.pop("weight_name", None)
@@ -73,7 +73,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
73
73
  force_download=force_download,
74
74
  proxies=proxies,
75
75
  local_files_only=local_files_only,
76
- token=token,
76
+ token=hf_token,
77
77
  revision=revision,
78
78
  subfolder=subfolder,
79
79
  user_agent=user_agent,
@@ -93,7 +93,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
93
93
  force_download=force_download,
94
94
  proxies=proxies,
95
95
  local_files_only=local_files_only,
96
- token=token,
96
+ token=hf_token,
97
97
  revision=revision,
98
98
  subfolder=subfolder,
99
99
  user_agent=user_agent,
@@ -312,7 +312,7 @@ class TextualInversionLoaderMixin:
312
312
  local_files_only (`bool`, *optional*, defaults to `False`):
313
313
  Whether to only load local model weights and configuration files or not. If set to `True`, the model
314
314
  won't be downloaded from the Hub.
315
- token (`str` or *bool*, *optional*):
315
+ hf_token (`str` or *bool*, *optional*):
316
316
  The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
317
317
  `diffusers-cli login` (stored in `~/.huggingface`) is used.
318
318
  revision (`str`, *optional*, defaults to `"main"`):
@@ -333,7 +333,7 @@ class TextualInversionLoaderMixin:
333
333
  from diffusers import StableDiffusionPipeline
334
334
  import torch
335
335
 
336
- model_id = "runwayml/stable-diffusion-v1-5"
336
+ model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
337
337
  pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
338
338
 
339
339
  pipe.load_textual_inversion("sd-concepts-library/cat-toy")
@@ -352,7 +352,7 @@ class TextualInversionLoaderMixin:
352
352
  from diffusers import StableDiffusionPipeline
353
353
  import torch
354
354
 
355
- model_id = "runwayml/stable-diffusion-v1-5"
355
+ model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
356
356
  pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
357
357
 
358
358
  pipe.load_textual_inversion("./charturnerv2.pt", token="charturnerv2")
@@ -449,9 +449,9 @@ class TextualInversionLoaderMixin:
449
449
 
450
450
  # 7.5 Offload the model again
451
451
  if is_model_cpu_offload:
452
- self.enable_model_cpu_offload()
452
+ self.enable_model_cpu_offload(device=device)
453
453
  elif is_sequential_cpu_offload:
454
- self.enable_sequential_cpu_offload()
454
+ self.enable_sequential_cpu_offload(device=device)
455
455
 
456
456
  # / Unsafe Code >
457
457
 
@@ -469,7 +469,7 @@ class TextualInversionLoaderMixin:
469
469
  from diffusers import AutoPipelineForText2Image
470
470
  import torch
471
471
 
472
- pipeline = AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5")
472
+ pipeline = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
473
473
 
474
474
  # Example 1
475
475
  pipeline.load_textual_inversion("sd-concepts-library/gta5-artwork")
@@ -17,7 +17,7 @@ from ..models.embeddings import (
17
17
  ImageProjection,
18
18
  MultiIPAdapterImageProjection,
19
19
  )
20
- from ..models.modeling_utils import load_model_dict_into_meta
20
+ from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
21
21
  from ..utils import (
22
22
  is_accelerate_available,
23
23
  is_torch_version,
@@ -36,7 +36,7 @@ class FluxTransformer2DLoadersMixin:
36
36
  Load layers into a [`FluxTransformer2DModel`].
37
37
  """
38
38
 
39
- def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=False):
39
+ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
40
40
  if low_cpu_mem_usage:
41
41
  if is_accelerate_available():
42
42
  from accelerate import init_empty_weights
@@ -82,11 +82,12 @@ class FluxTransformer2DLoadersMixin:
82
82
  if not low_cpu_mem_usage:
83
83
  image_projection.load_state_dict(updated_state_dict, strict=True)
84
84
  else:
85
- load_model_dict_into_meta(image_projection, updated_state_dict, device=self.device, dtype=self.dtype)
85
+ device_map = {"": self.device}
86
+ load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
86
87
 
87
88
  return image_projection
88
89
 
89
- def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=False):
90
+ def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
90
91
  from ..models.attention_processor import (
91
92
  FluxIPAdapterJointAttnProcessor2_0,
92
93
  )
@@ -151,15 +152,15 @@ class FluxTransformer2DLoadersMixin:
151
152
  if not low_cpu_mem_usage:
152
153
  attn_procs[name].load_state_dict(value_dict)
153
154
  else:
154
- device = self.device
155
+ device_map = {"": self.device}
155
156
  dtype = self.dtype
156
- load_model_dict_into_meta(attn_procs[name], value_dict, device=device, dtype=dtype)
157
+ load_model_dict_into_meta(attn_procs[name], value_dict, device_map=device_map, dtype=dtype)
157
158
 
158
159
  key_id += 1
159
160
 
160
161
  return attn_procs
161
162
 
162
- def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=False):
163
+ def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
163
164
  if not isinstance(state_dicts, list):
164
165
  state_dicts = [state_dicts]
165
166
 
@@ -177,5 +178,3 @@ class FluxTransformer2DLoadersMixin:
177
178
 
178
179
  self.encoder_hid_proj = MultiIPAdapterImageProjection(image_projection_layers)
179
180
  self.config.encoder_hid_dim_type = "ip_image_proj"
180
-
181
- self.to(dtype=self.dtype, device=self.device)
@@ -11,79 +11,160 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from contextlib import nullcontext
14
15
  from typing import Dict
15
16
 
16
17
  from ..models.attention_processor import SD3IPAdapterJointAttnProcessor2_0
17
18
  from ..models.embeddings import IPAdapterTimeImageProjection
18
19
  from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
20
+ from ..utils import is_accelerate_available, is_torch_version, logging
21
+
22
+
23
+ logger = logging.get_logger(__name__)
19
24
 
20
25
 
21
26
  class SD3Transformer2DLoadersMixin:
22
27
  """Load IP-Adapters and LoRA layers into a `[SD3Transformer2DModel]`."""
23
28
 
24
- def _load_ip_adapter_weights(self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT) -> None:
25
- """Sets IP-Adapter attention processors, image projection, and loads state_dict.
29
+ def _convert_ip_adapter_attn_to_diffusers(
30
+ self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT
31
+ ) -> Dict:
32
+ if low_cpu_mem_usage:
33
+ if is_accelerate_available():
34
+ from accelerate import init_empty_weights
35
+
36
+ else:
37
+ low_cpu_mem_usage = False
38
+ logger.warning(
39
+ "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
40
+ " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
41
+ " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
42
+ " install accelerate\n```\n."
43
+ )
44
+
45
+ if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
46
+ raise NotImplementedError(
47
+ "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
48
+ " `low_cpu_mem_usage=False`."
49
+ )
26
50
 
27
- Args:
28
- state_dict (`Dict`):
29
- State dict with keys "ip_adapter", which contains parameters for attention processors, and
30
- "image_proj", which contains parameters for image projection net.
31
- low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
32
- Speed up model loading only loading the pretrained weights and not initializing the weights. This also
33
- tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
34
- Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
35
- argument to `True` will raise an error.
36
- """
37
51
  # IP-Adapter cross attention parameters
38
52
  hidden_size = self.config.attention_head_dim * self.config.num_attention_heads
39
53
  ip_hidden_states_dim = self.config.attention_head_dim * self.config.num_attention_heads
40
- timesteps_emb_dim = state_dict["ip_adapter"]["0.norm_ip.linear.weight"].shape[1]
54
+ timesteps_emb_dim = state_dict["0.norm_ip.linear.weight"].shape[1]
41
55
 
42
56
  # Dict where key is transformer layer index, value is attention processor's state dict
43
57
  # ip_adapter state dict keys example: "0.norm_ip.linear.weight"
44
58
  layer_state_dict = {idx: {} for idx in range(len(self.attn_processors))}
45
- for key, weights in state_dict["ip_adapter"].items():
59
+ for key, weights in state_dict.items():
46
60
  idx, name = key.split(".", maxsplit=1)
47
61
  layer_state_dict[int(idx)][name] = weights
48
62
 
49
- # Create IP-Adapter attention processor
63
+ # Create IP-Adapter attention processor & load state_dict
50
64
  attn_procs = {}
65
+ init_context = init_empty_weights if low_cpu_mem_usage else nullcontext
51
66
  for idx, name in enumerate(self.attn_processors.keys()):
52
- attn_procs[name] = SD3IPAdapterJointAttnProcessor2_0(
53
- hidden_size=hidden_size,
54
- ip_hidden_states_dim=ip_hidden_states_dim,
55
- head_dim=self.config.attention_head_dim,
56
- timesteps_emb_dim=timesteps_emb_dim,
57
- ).to(self.device, dtype=self.dtype)
67
+ with init_context():
68
+ attn_procs[name] = SD3IPAdapterJointAttnProcessor2_0(
69
+ hidden_size=hidden_size,
70
+ ip_hidden_states_dim=ip_hidden_states_dim,
71
+ head_dim=self.config.attention_head_dim,
72
+ timesteps_emb_dim=timesteps_emb_dim,
73
+ )
58
74
 
59
75
  if not low_cpu_mem_usage:
60
76
  attn_procs[name].load_state_dict(layer_state_dict[idx], strict=True)
61
77
  else:
78
+ device_map = {"": self.device}
62
79
  load_model_dict_into_meta(
63
- attn_procs[name], layer_state_dict[idx], device=self.device, dtype=self.dtype
80
+ attn_procs[name], layer_state_dict[idx], device_map=device_map, dtype=self.dtype
64
81
  )
65
82
 
66
- self.set_attn_processor(attn_procs)
83
+ return attn_procs
84
+
85
+ def _convert_ip_adapter_image_proj_to_diffusers(
86
+ self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT
87
+ ) -> IPAdapterTimeImageProjection:
88
+ if low_cpu_mem_usage:
89
+ if is_accelerate_available():
90
+ from accelerate import init_empty_weights
91
+
92
+ else:
93
+ low_cpu_mem_usage = False
94
+ logger.warning(
95
+ "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
96
+ " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
97
+ " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
98
+ " install accelerate\n```\n."
99
+ )
100
+
101
+ if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
102
+ raise NotImplementedError(
103
+ "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
104
+ " `low_cpu_mem_usage=False`."
105
+ )
106
+
107
+ init_context = init_empty_weights if low_cpu_mem_usage else nullcontext
108
+
109
+ # Convert to diffusers
110
+ updated_state_dict = {}
111
+ for key, value in state_dict.items():
112
+ # InstantX/SD3.5-Large-IP-Adapter
113
+ if key.startswith("layers."):
114
+ idx = key.split(".")[1]
115
+ key = key.replace(f"layers.{idx}.0.norm1", f"layers.{idx}.ln0")
116
+ key = key.replace(f"layers.{idx}.0.norm2", f"layers.{idx}.ln1")
117
+ key = key.replace(f"layers.{idx}.0.to_q", f"layers.{idx}.attn.to_q")
118
+ key = key.replace(f"layers.{idx}.0.to_kv", f"layers.{idx}.attn.to_kv")
119
+ key = key.replace(f"layers.{idx}.0.to_out", f"layers.{idx}.attn.to_out.0")
120
+ key = key.replace(f"layers.{idx}.1.0", f"layers.{idx}.adaln_norm")
121
+ key = key.replace(f"layers.{idx}.1.1", f"layers.{idx}.ff.net.0.proj")
122
+ key = key.replace(f"layers.{idx}.1.3", f"layers.{idx}.ff.net.2")
123
+ key = key.replace(f"layers.{idx}.2.1", f"layers.{idx}.adaln_proj")
124
+ updated_state_dict[key] = value
67
125
 
68
126
  # Image projetion parameters
69
- embed_dim = state_dict["image_proj"]["proj_in.weight"].shape[1]
70
- output_dim = state_dict["image_proj"]["proj_out.weight"].shape[0]
71
- hidden_dim = state_dict["image_proj"]["proj_in.weight"].shape[0]
72
- heads = state_dict["image_proj"]["layers.0.attn.to_q.weight"].shape[0] // 64
73
- num_queries = state_dict["image_proj"]["latents"].shape[1]
74
- timestep_in_dim = state_dict["image_proj"]["time_embedding.linear_1.weight"].shape[1]
127
+ embed_dim = updated_state_dict["proj_in.weight"].shape[1]
128
+ output_dim = updated_state_dict["proj_out.weight"].shape[0]
129
+ hidden_dim = updated_state_dict["proj_in.weight"].shape[0]
130
+ heads = updated_state_dict["layers.0.attn.to_q.weight"].shape[0] // 64
131
+ num_queries = updated_state_dict["latents"].shape[1]
132
+ timestep_in_dim = updated_state_dict["time_embedding.linear_1.weight"].shape[1]
75
133
 
76
134
  # Image projection
77
- self.image_proj = IPAdapterTimeImageProjection(
78
- embed_dim=embed_dim,
79
- output_dim=output_dim,
80
- hidden_dim=hidden_dim,
81
- heads=heads,
82
- num_queries=num_queries,
83
- timestep_in_dim=timestep_in_dim,
84
- ).to(device=self.device, dtype=self.dtype)
135
+ with init_context():
136
+ image_proj = IPAdapterTimeImageProjection(
137
+ embed_dim=embed_dim,
138
+ output_dim=output_dim,
139
+ hidden_dim=hidden_dim,
140
+ heads=heads,
141
+ num_queries=num_queries,
142
+ timestep_in_dim=timestep_in_dim,
143
+ )
85
144
 
86
145
  if not low_cpu_mem_usage:
87
- self.image_proj.load_state_dict(state_dict["image_proj"], strict=True)
146
+ image_proj.load_state_dict(updated_state_dict, strict=True)
88
147
  else:
89
- load_model_dict_into_meta(self.image_proj, state_dict["image_proj"], device=self.device, dtype=self.dtype)
148
+ device_map = {"": self.device}
149
+ load_model_dict_into_meta(image_proj, updated_state_dict, device_map=device_map, dtype=self.dtype)
150
+
151
+ return image_proj
152
+
153
+ def _load_ip_adapter_weights(self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT) -> None:
154
+ """Sets IP-Adapter attention processors, image projection, and loads state_dict.
155
+
156
+ Args:
157
+ state_dict (`Dict`):
158
+ State dict with keys "ip_adapter", which contains parameters for attention processors, and
159
+ "image_proj", which contains parameters for image projection net.
160
+ low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
161
+ Speed up model loading only loading the pretrained weights and not initializing the weights. This also
162
+ tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
163
+ Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
164
+ argument to `True` will raise an error.
165
+ """
166
+
167
+ attn_procs = self._convert_ip_adapter_attn_to_diffusers(state_dict["ip_adapter"], low_cpu_mem_usage)
168
+ self.set_attn_processor(attn_procs)
169
+
170
+ self.image_proj = self._convert_ip_adapter_image_proj_to_diffusers(state_dict["image_proj"], low_cpu_mem_usage)
diffusers/loaders/unet.py CHANGED
@@ -21,7 +21,6 @@ import safetensors
21
21
  import torch
22
22
  import torch.nn.functional as F
23
23
  from huggingface_hub.utils import validate_hf_hub_args
24
- from torch import nn
25
24
 
26
25
  from ..models.embeddings import (
27
26
  ImageProjection,
@@ -31,7 +30,7 @@ from ..models.embeddings import (
31
30
  IPAdapterPlusImageProjection,
32
31
  MultiIPAdapterImageProjection,
33
32
  )
34
- from ..models.modeling_utils import load_model_dict_into_meta, load_state_dict
33
+ from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta, load_state_dict
35
34
  from ..utils import (
36
35
  USE_PEFT_BACKEND,
37
36
  _get_model_file,
@@ -44,13 +43,11 @@ from ..utils import (
44
43
  is_torch_version,
45
44
  logging,
46
45
  )
46
+ from .lora_base import _func_optionally_disable_offloading
47
47
  from .lora_pipeline import LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE, TEXT_ENCODER_NAME, UNET_NAME
48
48
  from .utils import AttnProcsLayers
49
49
 
50
50
 
51
- if is_accelerate_available():
52
- from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module
53
-
54
51
  logger = logging.get_logger(__name__)
55
52
 
56
53
 
@@ -146,7 +143,7 @@ class UNet2DConditionLoadersMixin:
146
143
  adapter_name = kwargs.pop("adapter_name", None)
147
144
  _pipeline = kwargs.pop("_pipeline", None)
148
145
  network_alphas = kwargs.pop("network_alphas", None)
149
- low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
146
+ low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
150
147
  allow_pickle = False
151
148
 
152
149
  if low_cpu_mem_usage and is_peft_version("<=", "0.13.0"):
@@ -343,6 +340,17 @@ class UNet2DConditionLoadersMixin:
343
340
  else:
344
341
  if is_peft_version("<", "0.9.0"):
345
342
  lora_config_kwargs.pop("use_dora")
343
+
344
+ if "lora_bias" in lora_config_kwargs:
345
+ if lora_config_kwargs["lora_bias"]:
346
+ if is_peft_version("<=", "0.13.2"):
347
+ raise ValueError(
348
+ "You need `peft` 0.14.0 at least to use `bias` in LoRAs. Please upgrade your installation of `peft`."
349
+ )
350
+ else:
351
+ if is_peft_version("<=", "0.13.2"):
352
+ lora_config_kwargs.pop("lora_bias")
353
+
346
354
  lora_config = LoraConfig(**lora_config_kwargs)
347
355
 
348
356
  # adapter_name
@@ -400,27 +408,7 @@ class UNet2DConditionLoadersMixin:
400
408
  tuple:
401
409
  A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True.
402
410
  """
403
- is_model_cpu_offload = False
404
- is_sequential_cpu_offload = False
405
-
406
- if _pipeline is not None and _pipeline.hf_device_map is None:
407
- for _, component in _pipeline.components.items():
408
- if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
409
- if not is_model_cpu_offload:
410
- is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
411
- if not is_sequential_cpu_offload:
412
- is_sequential_cpu_offload = (
413
- isinstance(component._hf_hook, AlignDevicesHook)
414
- or hasattr(component._hf_hook, "hooks")
415
- and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
416
- )
417
-
418
- logger.info(
419
- "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
420
- )
421
- remove_hook_from_module(component, recurse=is_sequential_cpu_offload)
422
-
423
- return (is_model_cpu_offload, is_sequential_cpu_offload)
411
+ return _func_optionally_disable_offloading(_pipeline=_pipeline)
424
412
 
425
413
  def save_attn_procs(
426
414
  self,
@@ -552,7 +540,7 @@ class UNet2DConditionLoadersMixin:
552
540
 
553
541
  return state_dict
554
542
 
555
- def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=False):
543
+ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
556
544
  if low_cpu_mem_usage:
557
545
  if is_accelerate_available():
558
546
  from accelerate import init_empty_weights
@@ -765,11 +753,12 @@ class UNet2DConditionLoadersMixin:
765
753
  if not low_cpu_mem_usage:
766
754
  image_projection.load_state_dict(updated_state_dict, strict=True)
767
755
  else:
768
- load_model_dict_into_meta(image_projection, updated_state_dict, device=self.device, dtype=self.dtype)
756
+ device_map = {"": self.device}
757
+ load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
769
758
 
770
759
  return image_projection
771
760
 
772
- def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=False):
761
+ def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
773
762
  from ..models.attention_processor import (
774
763
  IPAdapterAttnProcessor,
775
764
  IPAdapterAttnProcessor2_0,
@@ -858,13 +847,14 @@ class UNet2DConditionLoadersMixin:
858
847
  else:
859
848
  device = next(iter(value_dict.values())).device
860
849
  dtype = next(iter(value_dict.values())).dtype
861
- load_model_dict_into_meta(attn_procs[name], value_dict, device=device, dtype=dtype)
850
+ device_map = {"": device}
851
+ load_model_dict_into_meta(attn_procs[name], value_dict, device_map=device_map, dtype=dtype)
862
852
 
863
853
  key_id += 2
864
854
 
865
855
  return attn_procs
866
856
 
867
- def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=False):
857
+ def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
868
858
  if not isinstance(state_dicts, list):
869
859
  state_dicts = [state_dicts]
870
860
 
@@ -26,6 +26,7 @@ _import_structure = {}
26
26
 
27
27
  if is_torch_available():
28
28
  _import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"]
29
+ _import_structure["auto_model"] = ["AutoModel"]
29
30
  _import_structure["autoencoders.autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"]
30
31
  _import_structure["autoencoders.autoencoder_dc"] = ["AutoencoderDC"]
31
32
  _import_structure["autoencoders.autoencoder_kl"] = ["AutoencoderKL"]
@@ -33,12 +34,15 @@ if is_torch_available():
33
34
  _import_structure["autoencoders.autoencoder_kl_cogvideox"] = ["AutoencoderKLCogVideoX"]
34
35
  _import_structure["autoencoders.autoencoder_kl_hunyuan_video"] = ["AutoencoderKLHunyuanVideo"]
35
36
  _import_structure["autoencoders.autoencoder_kl_ltx"] = ["AutoencoderKLLTXVideo"]
37
+ _import_structure["autoencoders.autoencoder_kl_magvit"] = ["AutoencoderKLMagvit"]
36
38
  _import_structure["autoencoders.autoencoder_kl_mochi"] = ["AutoencoderKLMochi"]
37
39
  _import_structure["autoencoders.autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"]
40
+ _import_structure["autoencoders.autoencoder_kl_wan"] = ["AutoencoderKLWan"]
38
41
  _import_structure["autoencoders.autoencoder_oobleck"] = ["AutoencoderOobleck"]
39
42
  _import_structure["autoencoders.autoencoder_tiny"] = ["AutoencoderTiny"]
40
43
  _import_structure["autoencoders.consistency_decoder_vae"] = ["ConsistencyDecoderVAE"]
41
44
  _import_structure["autoencoders.vq_model"] = ["VQModel"]
45
+ _import_structure["cache_utils"] = ["CacheMixin"]
42
46
  _import_structure["controlnets.controlnet"] = ["ControlNetModel"]
43
47
  _import_structure["controlnets.controlnet_flux"] = ["FluxControlNetModel", "FluxMultiControlNetModel"]
44
48
  _import_structure["controlnets.controlnet_hunyuan"] = [
@@ -50,10 +54,12 @@ if is_torch_available():
50
54
  _import_structure["controlnets.controlnet_union"] = ["ControlNetUnionModel"]
51
55
  _import_structure["controlnets.controlnet_xs"] = ["ControlNetXSAdapter", "UNetControlNetXSModel"]
52
56
  _import_structure["controlnets.multicontrolnet"] = ["MultiControlNetModel"]
57
+ _import_structure["controlnets.multicontrolnet_union"] = ["MultiControlNetUnionModel"]
53
58
  _import_structure["embeddings"] = ["ImageProjection"]
54
59
  _import_structure["modeling_utils"] = ["ModelMixin"]
55
60
  _import_structure["transformers.auraflow_transformer_2d"] = ["AuraFlowTransformer2DModel"]
56
61
  _import_structure["transformers.cogvideox_transformer_3d"] = ["CogVideoXTransformer3DModel"]
62
+ _import_structure["transformers.consisid_transformer_3d"] = ["ConsisIDTransformer3DModel"]
57
63
  _import_structure["transformers.dit_transformer_2d"] = ["DiTTransformer2DModel"]
58
64
  _import_structure["transformers.dual_transformer_2d"] = ["DualTransformer2DModel"]
59
65
  _import_structure["transformers.hunyuan_transformer_2d"] = ["HunyuanDiT2DModel"]
@@ -67,12 +73,17 @@ if is_torch_available():
67
73
  _import_structure["transformers.transformer_2d"] = ["Transformer2DModel"]
68
74
  _import_structure["transformers.transformer_allegro"] = ["AllegroTransformer3DModel"]
69
75
  _import_structure["transformers.transformer_cogview3plus"] = ["CogView3PlusTransformer2DModel"]
76
+ _import_structure["transformers.transformer_cogview4"] = ["CogView4Transformer2DModel"]
77
+ _import_structure["transformers.transformer_easyanimate"] = ["EasyAnimateTransformer3DModel"]
70
78
  _import_structure["transformers.transformer_flux"] = ["FluxTransformer2DModel"]
71
79
  _import_structure["transformers.transformer_hunyuan_video"] = ["HunyuanVideoTransformer3DModel"]
72
80
  _import_structure["transformers.transformer_ltx"] = ["LTXVideoTransformer3DModel"]
81
+ _import_structure["transformers.transformer_lumina2"] = ["Lumina2Transformer2DModel"]
73
82
  _import_structure["transformers.transformer_mochi"] = ["MochiTransformer3DModel"]
83
+ _import_structure["transformers.transformer_omnigen"] = ["OmniGenTransformer2DModel"]
74
84
  _import_structure["transformers.transformer_sd3"] = ["SD3Transformer2DModel"]
75
85
  _import_structure["transformers.transformer_temporal"] = ["TransformerTemporalModel"]
86
+ _import_structure["transformers.transformer_wan"] = ["WanTransformer3DModel"]
76
87
  _import_structure["unets.unet_1d"] = ["UNet1DModel"]
77
88
  _import_structure["unets.unet_2d"] = ["UNet2DModel"]
78
89
  _import_structure["unets.unet_2d_condition"] = ["UNet2DConditionModel"]
@@ -93,6 +104,7 @@ if is_flax_available():
93
104
  if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
94
105
  if is_torch_available():
95
106
  from .adapter import MultiAdapter, T2IAdapter
107
+ from .auto_model import AutoModel
96
108
  from .autoencoders import (
97
109
  AsymmetricAutoencoderKL,
98
110
  AutoencoderDC,
@@ -101,13 +113,16 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
101
113
  AutoencoderKLCogVideoX,
102
114
  AutoencoderKLHunyuanVideo,
103
115
  AutoencoderKLLTXVideo,
116
+ AutoencoderKLMagvit,
104
117
  AutoencoderKLMochi,
105
118
  AutoencoderKLTemporalDecoder,
119
+ AutoencoderKLWan,
106
120
  AutoencoderOobleck,
107
121
  AutoencoderTiny,
108
122
  ConsistencyDecoderVAE,
109
123
  VQModel,
110
124
  )
125
+ from .cache_utils import CacheMixin
111
126
  from .controlnets import (
112
127
  ControlNetModel,
113
128
  ControlNetUnionModel,
@@ -117,6 +132,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
117
132
  HunyuanDiT2DControlNetModel,
118
133
  HunyuanDiT2DMultiControlNetModel,
119
134
  MultiControlNetModel,
135
+ MultiControlNetUnionModel,
120
136
  SD3ControlNetModel,
121
137
  SD3MultiControlNetModel,
122
138
  SparseControlNetModel,
@@ -129,15 +145,20 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
129
145
  AuraFlowTransformer2DModel,
130
146
  CogVideoXTransformer3DModel,
131
147
  CogView3PlusTransformer2DModel,
148
+ CogView4Transformer2DModel,
149
+ ConsisIDTransformer3DModel,
132
150
  DiTTransformer2DModel,
133
151
  DualTransformer2DModel,
152
+ EasyAnimateTransformer3DModel,
134
153
  FluxTransformer2DModel,
135
154
  HunyuanDiT2DModel,
136
155
  HunyuanVideoTransformer3DModel,
137
156
  LatteTransformer3DModel,
138
157
  LTXVideoTransformer3DModel,
158
+ Lumina2Transformer2DModel,
139
159
  LuminaNextDiT2DModel,
140
160
  MochiTransformer3DModel,
161
+ OmniGenTransformer2DModel,
141
162
  PixArtTransformer2DModel,
142
163
  PriorTransformer,
143
164
  SanaTransformer2DModel,
@@ -146,6 +167,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
146
167
  T5FilmDecoder,
147
168
  Transformer2DModel,
148
169
  TransformerTemporalModel,
170
+ WanTransformer3DModel,
149
171
  )
150
172
  from .unets import (
151
173
  I2VGenXLUNet,
@@ -24,12 +24,12 @@ from ..utils.import_utils import is_torch_npu_available, is_torch_version
24
24
  if is_torch_npu_available():
25
25
  import torch_npu
26
26
 
27
- ACTIVATION_FUNCTIONS = {
28
- "swish": nn.SiLU(),
29
- "silu": nn.SiLU(),
30
- "mish": nn.Mish(),
31
- "gelu": nn.GELU(),
32
- "relu": nn.ReLU(),
27
+ ACT2CLS = {
28
+ "swish": nn.SiLU,
29
+ "silu": nn.SiLU,
30
+ "mish": nn.Mish,
31
+ "gelu": nn.GELU,
32
+ "relu": nn.ReLU,
33
33
  }
34
34
 
35
35
 
@@ -44,10 +44,10 @@ def get_activation(act_fn: str) -> nn.Module:
44
44
  """
45
45
 
46
46
  act_fn = act_fn.lower()
47
- if act_fn in ACTIVATION_FUNCTIONS:
48
- return ACTIVATION_FUNCTIONS[act_fn]
47
+ if act_fn in ACT2CLS:
48
+ return ACT2CLS[act_fn]()
49
49
  else:
50
- raise ValueError(f"Unsupported activation function: {act_fn}")
50
+ raise ValueError(f"activation function {act_fn} not found in ACT2FN mapping {list(ACT2CLS.keys())}")
51
51
 
52
52
 
53
53
  class FP32SiLU(nn.Module):
@@ -612,7 +612,6 @@ class LuminaFeedForward(nn.Module):
612
612
  ffn_dim_multiplier: Optional[float] = None,
613
613
  ):
614
614
  super().__init__()
615
- inner_dim = int(2 * inner_dim / 3)
616
615
  # custom hidden_size factor multiplier
617
616
  if ffn_dim_multiplier is not None:
618
617
  inner_dim = int(ffn_dim_multiplier * inner_dim)