diffusers 0.32.2__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +121 -86
  13. diffusers/loaders/lora_conversion_utils.py +504 -44
  14. diffusers/loaders/lora_pipeline.py +1769 -181
  15. diffusers/loaders/peft.py +167 -57
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +646 -72
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +20 -7
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +9 -1
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +2 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.2.dist-info/RECORD +0 -550
  387. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -40,7 +40,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
40
40
  force_download = kwargs.pop("force_download", False)
41
41
  proxies = kwargs.pop("proxies", None)
42
42
  local_files_only = kwargs.pop("local_files_only", None)
43
- token = kwargs.pop("token", None)
43
+ hf_token = kwargs.pop("hf_token", None)
44
44
  revision = kwargs.pop("revision", None)
45
45
  subfolder = kwargs.pop("subfolder", None)
46
46
  weight_name = kwargs.pop("weight_name", None)
@@ -73,7 +73,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
73
73
  force_download=force_download,
74
74
  proxies=proxies,
75
75
  local_files_only=local_files_only,
76
- token=token,
76
+ token=hf_token,
77
77
  revision=revision,
78
78
  subfolder=subfolder,
79
79
  user_agent=user_agent,
@@ -93,7 +93,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
93
93
  force_download=force_download,
94
94
  proxies=proxies,
95
95
  local_files_only=local_files_only,
96
- token=token,
96
+ token=hf_token,
97
97
  revision=revision,
98
98
  subfolder=subfolder,
99
99
  user_agent=user_agent,
@@ -312,7 +312,7 @@ class TextualInversionLoaderMixin:
312
312
  local_files_only (`bool`, *optional*, defaults to `False`):
313
313
  Whether to only load local model weights and configuration files or not. If set to `True`, the model
314
314
  won't be downloaded from the Hub.
315
- token (`str` or *bool*, *optional*):
315
+ hf_token (`str` or *bool*, *optional*):
316
316
  The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
317
317
  `diffusers-cli login` (stored in `~/.huggingface`) is used.
318
318
  revision (`str`, *optional*, defaults to `"main"`):
@@ -333,7 +333,7 @@ class TextualInversionLoaderMixin:
333
333
  from diffusers import StableDiffusionPipeline
334
334
  import torch
335
335
 
336
- model_id = "runwayml/stable-diffusion-v1-5"
336
+ model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
337
337
  pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
338
338
 
339
339
  pipe.load_textual_inversion("sd-concepts-library/cat-toy")
@@ -352,7 +352,7 @@ class TextualInversionLoaderMixin:
352
352
  from diffusers import StableDiffusionPipeline
353
353
  import torch
354
354
 
355
- model_id = "runwayml/stable-diffusion-v1-5"
355
+ model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
356
356
  pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
357
357
 
358
358
  pipe.load_textual_inversion("./charturnerv2.pt", token="charturnerv2")
@@ -449,9 +449,9 @@ class TextualInversionLoaderMixin:
449
449
 
450
450
  # 7.5 Offload the model again
451
451
  if is_model_cpu_offload:
452
- self.enable_model_cpu_offload()
452
+ self.enable_model_cpu_offload(device=device)
453
453
  elif is_sequential_cpu_offload:
454
- self.enable_sequential_cpu_offload()
454
+ self.enable_sequential_cpu_offload(device=device)
455
455
 
456
456
  # / Unsafe Code >
457
457
 
@@ -469,7 +469,7 @@ class TextualInversionLoaderMixin:
469
469
  from diffusers import AutoPipelineForText2Image
470
470
  import torch
471
471
 
472
- pipeline = AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5")
472
+ pipeline = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
473
473
 
474
474
  # Example 1
475
475
  pipeline.load_textual_inversion("sd-concepts-library/gta5-artwork")
@@ -17,7 +17,7 @@ from ..models.embeddings import (
17
17
  ImageProjection,
18
18
  MultiIPAdapterImageProjection,
19
19
  )
20
- from ..models.modeling_utils import load_model_dict_into_meta
20
+ from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
21
21
  from ..utils import (
22
22
  is_accelerate_available,
23
23
  is_torch_version,
@@ -36,7 +36,7 @@ class FluxTransformer2DLoadersMixin:
36
36
  Load layers into a [`FluxTransformer2DModel`].
37
37
  """
38
38
 
39
- def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=False):
39
+ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
40
40
  if low_cpu_mem_usage:
41
41
  if is_accelerate_available():
42
42
  from accelerate import init_empty_weights
@@ -82,11 +82,12 @@ class FluxTransformer2DLoadersMixin:
82
82
  if not low_cpu_mem_usage:
83
83
  image_projection.load_state_dict(updated_state_dict, strict=True)
84
84
  else:
85
- load_model_dict_into_meta(image_projection, updated_state_dict, device=self.device, dtype=self.dtype)
85
+ device_map = {"": self.device}
86
+ load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
86
87
 
87
88
  return image_projection
88
89
 
89
- def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=False):
90
+ def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
90
91
  from ..models.attention_processor import (
91
92
  FluxIPAdapterJointAttnProcessor2_0,
92
93
  )
@@ -151,15 +152,15 @@ class FluxTransformer2DLoadersMixin:
151
152
  if not low_cpu_mem_usage:
152
153
  attn_procs[name].load_state_dict(value_dict)
153
154
  else:
154
- device = self.device
155
+ device_map = {"": self.device}
155
156
  dtype = self.dtype
156
- load_model_dict_into_meta(attn_procs[name], value_dict, device=device, dtype=dtype)
157
+ load_model_dict_into_meta(attn_procs[name], value_dict, device_map=device_map, dtype=dtype)
157
158
 
158
159
  key_id += 1
159
160
 
160
161
  return attn_procs
161
162
 
162
- def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=False):
163
+ def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
163
164
  if not isinstance(state_dicts, list):
164
165
  state_dicts = [state_dicts]
165
166
 
@@ -177,5 +178,3 @@ class FluxTransformer2DLoadersMixin:
177
178
 
178
179
  self.encoder_hid_proj = MultiIPAdapterImageProjection(image_projection_layers)
179
180
  self.config.encoder_hid_dim_type = "ip_image_proj"
180
-
181
- self.to(dtype=self.dtype, device=self.device)
@@ -11,79 +11,160 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from contextlib import nullcontext
14
15
  from typing import Dict
15
16
 
16
17
  from ..models.attention_processor import SD3IPAdapterJointAttnProcessor2_0
17
18
  from ..models.embeddings import IPAdapterTimeImageProjection
18
19
  from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
20
+ from ..utils import is_accelerate_available, is_torch_version, logging
21
+
22
+
23
+ logger = logging.get_logger(__name__)
19
24
 
20
25
 
21
26
  class SD3Transformer2DLoadersMixin:
22
27
  """Load IP-Adapters and LoRA layers into a `[SD3Transformer2DModel]`."""
23
28
 
24
- def _load_ip_adapter_weights(self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT) -> None:
25
- """Sets IP-Adapter attention processors, image projection, and loads state_dict.
29
+ def _convert_ip_adapter_attn_to_diffusers(
30
+ self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT
31
+ ) -> Dict:
32
+ if low_cpu_mem_usage:
33
+ if is_accelerate_available():
34
+ from accelerate import init_empty_weights
35
+
36
+ else:
37
+ low_cpu_mem_usage = False
38
+ logger.warning(
39
+ "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
40
+ " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
41
+ " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
42
+ " install accelerate\n```\n."
43
+ )
44
+
45
+ if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
46
+ raise NotImplementedError(
47
+ "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
48
+ " `low_cpu_mem_usage=False`."
49
+ )
26
50
 
27
- Args:
28
- state_dict (`Dict`):
29
- State dict with keys "ip_adapter", which contains parameters for attention processors, and
30
- "image_proj", which contains parameters for image projection net.
31
- low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
32
- Speed up model loading only loading the pretrained weights and not initializing the weights. This also
33
- tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
34
- Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
35
- argument to `True` will raise an error.
36
- """
37
51
  # IP-Adapter cross attention parameters
38
52
  hidden_size = self.config.attention_head_dim * self.config.num_attention_heads
39
53
  ip_hidden_states_dim = self.config.attention_head_dim * self.config.num_attention_heads
40
- timesteps_emb_dim = state_dict["ip_adapter"]["0.norm_ip.linear.weight"].shape[1]
54
+ timesteps_emb_dim = state_dict["0.norm_ip.linear.weight"].shape[1]
41
55
 
42
56
  # Dict where key is transformer layer index, value is attention processor's state dict
43
57
  # ip_adapter state dict keys example: "0.norm_ip.linear.weight"
44
58
  layer_state_dict = {idx: {} for idx in range(len(self.attn_processors))}
45
- for key, weights in state_dict["ip_adapter"].items():
59
+ for key, weights in state_dict.items():
46
60
  idx, name = key.split(".", maxsplit=1)
47
61
  layer_state_dict[int(idx)][name] = weights
48
62
 
49
- # Create IP-Adapter attention processor
63
+ # Create IP-Adapter attention processor & load state_dict
50
64
  attn_procs = {}
65
+ init_context = init_empty_weights if low_cpu_mem_usage else nullcontext
51
66
  for idx, name in enumerate(self.attn_processors.keys()):
52
- attn_procs[name] = SD3IPAdapterJointAttnProcessor2_0(
53
- hidden_size=hidden_size,
54
- ip_hidden_states_dim=ip_hidden_states_dim,
55
- head_dim=self.config.attention_head_dim,
56
- timesteps_emb_dim=timesteps_emb_dim,
57
- ).to(self.device, dtype=self.dtype)
67
+ with init_context():
68
+ attn_procs[name] = SD3IPAdapterJointAttnProcessor2_0(
69
+ hidden_size=hidden_size,
70
+ ip_hidden_states_dim=ip_hidden_states_dim,
71
+ head_dim=self.config.attention_head_dim,
72
+ timesteps_emb_dim=timesteps_emb_dim,
73
+ )
58
74
 
59
75
  if not low_cpu_mem_usage:
60
76
  attn_procs[name].load_state_dict(layer_state_dict[idx], strict=True)
61
77
  else:
78
+ device_map = {"": self.device}
62
79
  load_model_dict_into_meta(
63
- attn_procs[name], layer_state_dict[idx], device=self.device, dtype=self.dtype
80
+ attn_procs[name], layer_state_dict[idx], device_map=device_map, dtype=self.dtype
64
81
  )
65
82
 
66
- self.set_attn_processor(attn_procs)
83
+ return attn_procs
84
+
85
+ def _convert_ip_adapter_image_proj_to_diffusers(
86
+ self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT
87
+ ) -> IPAdapterTimeImageProjection:
88
+ if low_cpu_mem_usage:
89
+ if is_accelerate_available():
90
+ from accelerate import init_empty_weights
91
+
92
+ else:
93
+ low_cpu_mem_usage = False
94
+ logger.warning(
95
+ "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
96
+ " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
97
+ " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
98
+ " install accelerate\n```\n."
99
+ )
100
+
101
+ if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
102
+ raise NotImplementedError(
103
+ "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
104
+ " `low_cpu_mem_usage=False`."
105
+ )
106
+
107
+ init_context = init_empty_weights if low_cpu_mem_usage else nullcontext
108
+
109
+ # Convert to diffusers
110
+ updated_state_dict = {}
111
+ for key, value in state_dict.items():
112
+ # InstantX/SD3.5-Large-IP-Adapter
113
+ if key.startswith("layers."):
114
+ idx = key.split(".")[1]
115
+ key = key.replace(f"layers.{idx}.0.norm1", f"layers.{idx}.ln0")
116
+ key = key.replace(f"layers.{idx}.0.norm2", f"layers.{idx}.ln1")
117
+ key = key.replace(f"layers.{idx}.0.to_q", f"layers.{idx}.attn.to_q")
118
+ key = key.replace(f"layers.{idx}.0.to_kv", f"layers.{idx}.attn.to_kv")
119
+ key = key.replace(f"layers.{idx}.0.to_out", f"layers.{idx}.attn.to_out.0")
120
+ key = key.replace(f"layers.{idx}.1.0", f"layers.{idx}.adaln_norm")
121
+ key = key.replace(f"layers.{idx}.1.1", f"layers.{idx}.ff.net.0.proj")
122
+ key = key.replace(f"layers.{idx}.1.3", f"layers.{idx}.ff.net.2")
123
+ key = key.replace(f"layers.{idx}.2.1", f"layers.{idx}.adaln_proj")
124
+ updated_state_dict[key] = value
67
125
 
68
126
  # Image projetion parameters
69
- embed_dim = state_dict["image_proj"]["proj_in.weight"].shape[1]
70
- output_dim = state_dict["image_proj"]["proj_out.weight"].shape[0]
71
- hidden_dim = state_dict["image_proj"]["proj_in.weight"].shape[0]
72
- heads = state_dict["image_proj"]["layers.0.attn.to_q.weight"].shape[0] // 64
73
- num_queries = state_dict["image_proj"]["latents"].shape[1]
74
- timestep_in_dim = state_dict["image_proj"]["time_embedding.linear_1.weight"].shape[1]
127
+ embed_dim = updated_state_dict["proj_in.weight"].shape[1]
128
+ output_dim = updated_state_dict["proj_out.weight"].shape[0]
129
+ hidden_dim = updated_state_dict["proj_in.weight"].shape[0]
130
+ heads = updated_state_dict["layers.0.attn.to_q.weight"].shape[0] // 64
131
+ num_queries = updated_state_dict["latents"].shape[1]
132
+ timestep_in_dim = updated_state_dict["time_embedding.linear_1.weight"].shape[1]
75
133
 
76
134
  # Image projection
77
- self.image_proj = IPAdapterTimeImageProjection(
78
- embed_dim=embed_dim,
79
- output_dim=output_dim,
80
- hidden_dim=hidden_dim,
81
- heads=heads,
82
- num_queries=num_queries,
83
- timestep_in_dim=timestep_in_dim,
84
- ).to(device=self.device, dtype=self.dtype)
135
+ with init_context():
136
+ image_proj = IPAdapterTimeImageProjection(
137
+ embed_dim=embed_dim,
138
+ output_dim=output_dim,
139
+ hidden_dim=hidden_dim,
140
+ heads=heads,
141
+ num_queries=num_queries,
142
+ timestep_in_dim=timestep_in_dim,
143
+ )
85
144
 
86
145
  if not low_cpu_mem_usage:
87
- self.image_proj.load_state_dict(state_dict["image_proj"], strict=True)
146
+ image_proj.load_state_dict(updated_state_dict, strict=True)
88
147
  else:
89
- load_model_dict_into_meta(self.image_proj, state_dict["image_proj"], device=self.device, dtype=self.dtype)
148
+ device_map = {"": self.device}
149
+ load_model_dict_into_meta(image_proj, updated_state_dict, device_map=device_map, dtype=self.dtype)
150
+
151
+ return image_proj
152
+
153
+ def _load_ip_adapter_weights(self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT) -> None:
154
+ """Sets IP-Adapter attention processors, image projection, and loads state_dict.
155
+
156
+ Args:
157
+ state_dict (`Dict`):
158
+ State dict with keys "ip_adapter", which contains parameters for attention processors, and
159
+ "image_proj", which contains parameters for image projection net.
160
+ low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
161
+ Speed up model loading only loading the pretrained weights and not initializing the weights. This also
162
+ tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
163
+ Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
164
+ argument to `True` will raise an error.
165
+ """
166
+
167
+ attn_procs = self._convert_ip_adapter_attn_to_diffusers(state_dict["ip_adapter"], low_cpu_mem_usage)
168
+ self.set_attn_processor(attn_procs)
169
+
170
+ self.image_proj = self._convert_ip_adapter_image_proj_to_diffusers(state_dict["image_proj"], low_cpu_mem_usage)
diffusers/loaders/unet.py CHANGED
@@ -30,7 +30,7 @@ from ..models.embeddings import (
30
30
  IPAdapterPlusImageProjection,
31
31
  MultiIPAdapterImageProjection,
32
32
  )
33
- from ..models.modeling_utils import load_model_dict_into_meta, load_state_dict
33
+ from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta, load_state_dict
34
34
  from ..utils import (
35
35
  USE_PEFT_BACKEND,
36
36
  _get_model_file,
@@ -143,7 +143,7 @@ class UNet2DConditionLoadersMixin:
143
143
  adapter_name = kwargs.pop("adapter_name", None)
144
144
  _pipeline = kwargs.pop("_pipeline", None)
145
145
  network_alphas = kwargs.pop("network_alphas", None)
146
- low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
146
+ low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
147
147
  allow_pickle = False
148
148
 
149
149
  if low_cpu_mem_usage and is_peft_version("<=", "0.13.0"):
@@ -340,6 +340,17 @@ class UNet2DConditionLoadersMixin:
340
340
  else:
341
341
  if is_peft_version("<", "0.9.0"):
342
342
  lora_config_kwargs.pop("use_dora")
343
+
344
+ if "lora_bias" in lora_config_kwargs:
345
+ if lora_config_kwargs["lora_bias"]:
346
+ if is_peft_version("<=", "0.13.2"):
347
+ raise ValueError(
348
+ "You need `peft` 0.14.0 at least to use `bias` in LoRAs. Please upgrade your installation of `peft`."
349
+ )
350
+ else:
351
+ if is_peft_version("<=", "0.13.2"):
352
+ lora_config_kwargs.pop("lora_bias")
353
+
343
354
  lora_config = LoraConfig(**lora_config_kwargs)
344
355
 
345
356
  # adapter_name
@@ -529,7 +540,7 @@ class UNet2DConditionLoadersMixin:
529
540
 
530
541
  return state_dict
531
542
 
532
- def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=False):
543
+ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
533
544
  if low_cpu_mem_usage:
534
545
  if is_accelerate_available():
535
546
  from accelerate import init_empty_weights
@@ -742,11 +753,12 @@ class UNet2DConditionLoadersMixin:
742
753
  if not low_cpu_mem_usage:
743
754
  image_projection.load_state_dict(updated_state_dict, strict=True)
744
755
  else:
745
- load_model_dict_into_meta(image_projection, updated_state_dict, device=self.device, dtype=self.dtype)
756
+ device_map = {"": self.device}
757
+ load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
746
758
 
747
759
  return image_projection
748
760
 
749
- def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=False):
761
+ def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
750
762
  from ..models.attention_processor import (
751
763
  IPAdapterAttnProcessor,
752
764
  IPAdapterAttnProcessor2_0,
@@ -835,13 +847,14 @@ class UNet2DConditionLoadersMixin:
835
847
  else:
836
848
  device = next(iter(value_dict.values())).device
837
849
  dtype = next(iter(value_dict.values())).dtype
838
- load_model_dict_into_meta(attn_procs[name], value_dict, device=device, dtype=dtype)
850
+ device_map = {"": device}
851
+ load_model_dict_into_meta(attn_procs[name], value_dict, device_map=device_map, dtype=dtype)
839
852
 
840
853
  key_id += 2
841
854
 
842
855
  return attn_procs
843
856
 
844
- def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=False):
857
+ def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
845
858
  if not isinstance(state_dicts, list):
846
859
  state_dicts = [state_dicts]
847
860
 
@@ -26,6 +26,7 @@ _import_structure = {}
26
26
 
27
27
  if is_torch_available():
28
28
  _import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"]
29
+ _import_structure["auto_model"] = ["AutoModel"]
29
30
  _import_structure["autoencoders.autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"]
30
31
  _import_structure["autoencoders.autoencoder_dc"] = ["AutoencoderDC"]
31
32
  _import_structure["autoencoders.autoencoder_kl"] = ["AutoencoderKL"]
@@ -33,12 +34,15 @@ if is_torch_available():
33
34
  _import_structure["autoencoders.autoencoder_kl_cogvideox"] = ["AutoencoderKLCogVideoX"]
34
35
  _import_structure["autoencoders.autoencoder_kl_hunyuan_video"] = ["AutoencoderKLHunyuanVideo"]
35
36
  _import_structure["autoencoders.autoencoder_kl_ltx"] = ["AutoencoderKLLTXVideo"]
37
+ _import_structure["autoencoders.autoencoder_kl_magvit"] = ["AutoencoderKLMagvit"]
36
38
  _import_structure["autoencoders.autoencoder_kl_mochi"] = ["AutoencoderKLMochi"]
37
39
  _import_structure["autoencoders.autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"]
40
+ _import_structure["autoencoders.autoencoder_kl_wan"] = ["AutoencoderKLWan"]
38
41
  _import_structure["autoencoders.autoencoder_oobleck"] = ["AutoencoderOobleck"]
39
42
  _import_structure["autoencoders.autoencoder_tiny"] = ["AutoencoderTiny"]
40
43
  _import_structure["autoencoders.consistency_decoder_vae"] = ["ConsistencyDecoderVAE"]
41
44
  _import_structure["autoencoders.vq_model"] = ["VQModel"]
45
+ _import_structure["cache_utils"] = ["CacheMixin"]
42
46
  _import_structure["controlnets.controlnet"] = ["ControlNetModel"]
43
47
  _import_structure["controlnets.controlnet_flux"] = ["FluxControlNetModel", "FluxMultiControlNetModel"]
44
48
  _import_structure["controlnets.controlnet_hunyuan"] = [
@@ -50,10 +54,12 @@ if is_torch_available():
50
54
  _import_structure["controlnets.controlnet_union"] = ["ControlNetUnionModel"]
51
55
  _import_structure["controlnets.controlnet_xs"] = ["ControlNetXSAdapter", "UNetControlNetXSModel"]
52
56
  _import_structure["controlnets.multicontrolnet"] = ["MultiControlNetModel"]
57
+ _import_structure["controlnets.multicontrolnet_union"] = ["MultiControlNetUnionModel"]
53
58
  _import_structure["embeddings"] = ["ImageProjection"]
54
59
  _import_structure["modeling_utils"] = ["ModelMixin"]
55
60
  _import_structure["transformers.auraflow_transformer_2d"] = ["AuraFlowTransformer2DModel"]
56
61
  _import_structure["transformers.cogvideox_transformer_3d"] = ["CogVideoXTransformer3DModel"]
62
+ _import_structure["transformers.consisid_transformer_3d"] = ["ConsisIDTransformer3DModel"]
57
63
  _import_structure["transformers.dit_transformer_2d"] = ["DiTTransformer2DModel"]
58
64
  _import_structure["transformers.dual_transformer_2d"] = ["DualTransformer2DModel"]
59
65
  _import_structure["transformers.hunyuan_transformer_2d"] = ["HunyuanDiT2DModel"]
@@ -67,12 +73,17 @@ if is_torch_available():
67
73
  _import_structure["transformers.transformer_2d"] = ["Transformer2DModel"]
68
74
  _import_structure["transformers.transformer_allegro"] = ["AllegroTransformer3DModel"]
69
75
  _import_structure["transformers.transformer_cogview3plus"] = ["CogView3PlusTransformer2DModel"]
76
+ _import_structure["transformers.transformer_cogview4"] = ["CogView4Transformer2DModel"]
77
+ _import_structure["transformers.transformer_easyanimate"] = ["EasyAnimateTransformer3DModel"]
70
78
  _import_structure["transformers.transformer_flux"] = ["FluxTransformer2DModel"]
71
79
  _import_structure["transformers.transformer_hunyuan_video"] = ["HunyuanVideoTransformer3DModel"]
72
80
  _import_structure["transformers.transformer_ltx"] = ["LTXVideoTransformer3DModel"]
81
+ _import_structure["transformers.transformer_lumina2"] = ["Lumina2Transformer2DModel"]
73
82
  _import_structure["transformers.transformer_mochi"] = ["MochiTransformer3DModel"]
83
+ _import_structure["transformers.transformer_omnigen"] = ["OmniGenTransformer2DModel"]
74
84
  _import_structure["transformers.transformer_sd3"] = ["SD3Transformer2DModel"]
75
85
  _import_structure["transformers.transformer_temporal"] = ["TransformerTemporalModel"]
86
+ _import_structure["transformers.transformer_wan"] = ["WanTransformer3DModel"]
76
87
  _import_structure["unets.unet_1d"] = ["UNet1DModel"]
77
88
  _import_structure["unets.unet_2d"] = ["UNet2DModel"]
78
89
  _import_structure["unets.unet_2d_condition"] = ["UNet2DConditionModel"]
@@ -93,6 +104,7 @@ if is_flax_available():
93
104
  if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
94
105
  if is_torch_available():
95
106
  from .adapter import MultiAdapter, T2IAdapter
107
+ from .auto_model import AutoModel
96
108
  from .autoencoders import (
97
109
  AsymmetricAutoencoderKL,
98
110
  AutoencoderDC,
@@ -101,13 +113,16 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
101
113
  AutoencoderKLCogVideoX,
102
114
  AutoencoderKLHunyuanVideo,
103
115
  AutoencoderKLLTXVideo,
116
+ AutoencoderKLMagvit,
104
117
  AutoencoderKLMochi,
105
118
  AutoencoderKLTemporalDecoder,
119
+ AutoencoderKLWan,
106
120
  AutoencoderOobleck,
107
121
  AutoencoderTiny,
108
122
  ConsistencyDecoderVAE,
109
123
  VQModel,
110
124
  )
125
+ from .cache_utils import CacheMixin
111
126
  from .controlnets import (
112
127
  ControlNetModel,
113
128
  ControlNetUnionModel,
@@ -117,6 +132,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
117
132
  HunyuanDiT2DControlNetModel,
118
133
  HunyuanDiT2DMultiControlNetModel,
119
134
  MultiControlNetModel,
135
+ MultiControlNetUnionModel,
120
136
  SD3ControlNetModel,
121
137
  SD3MultiControlNetModel,
122
138
  SparseControlNetModel,
@@ -129,15 +145,20 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
129
145
  AuraFlowTransformer2DModel,
130
146
  CogVideoXTransformer3DModel,
131
147
  CogView3PlusTransformer2DModel,
148
+ CogView4Transformer2DModel,
149
+ ConsisIDTransformer3DModel,
132
150
  DiTTransformer2DModel,
133
151
  DualTransformer2DModel,
152
+ EasyAnimateTransformer3DModel,
134
153
  FluxTransformer2DModel,
135
154
  HunyuanDiT2DModel,
136
155
  HunyuanVideoTransformer3DModel,
137
156
  LatteTransformer3DModel,
138
157
  LTXVideoTransformer3DModel,
158
+ Lumina2Transformer2DModel,
139
159
  LuminaNextDiT2DModel,
140
160
  MochiTransformer3DModel,
161
+ OmniGenTransformer2DModel,
141
162
  PixArtTransformer2DModel,
142
163
  PriorTransformer,
143
164
  SanaTransformer2DModel,
@@ -146,6 +167,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
146
167
  T5FilmDecoder,
147
168
  Transformer2DModel,
148
169
  TransformerTemporalModel,
170
+ WanTransformer3DModel,
149
171
  )
150
172
  from .unets import (
151
173
  I2VGenXLUNet,
@@ -24,12 +24,12 @@ from ..utils.import_utils import is_torch_npu_available, is_torch_version
24
24
  if is_torch_npu_available():
25
25
  import torch_npu
26
26
 
27
- ACTIVATION_FUNCTIONS = {
28
- "swish": nn.SiLU(),
29
- "silu": nn.SiLU(),
30
- "mish": nn.Mish(),
31
- "gelu": nn.GELU(),
32
- "relu": nn.ReLU(),
27
+ ACT2CLS = {
28
+ "swish": nn.SiLU,
29
+ "silu": nn.SiLU,
30
+ "mish": nn.Mish,
31
+ "gelu": nn.GELU,
32
+ "relu": nn.ReLU,
33
33
  }
34
34
 
35
35
 
@@ -44,10 +44,10 @@ def get_activation(act_fn: str) -> nn.Module:
44
44
  """
45
45
 
46
46
  act_fn = act_fn.lower()
47
- if act_fn in ACTIVATION_FUNCTIONS:
48
- return ACTIVATION_FUNCTIONS[act_fn]
47
+ if act_fn in ACT2CLS:
48
+ return ACT2CLS[act_fn]()
49
49
  else:
50
- raise ValueError(f"Unsupported activation function: {act_fn}")
50
+ raise ValueError(f"activation function {act_fn} not found in ACT2FN mapping {list(ACT2CLS.keys())}")
51
51
 
52
52
 
53
53
  class FP32SiLU(nn.Module):
@@ -612,7 +612,6 @@ class LuminaFeedForward(nn.Module):
612
612
  ffn_dim_multiplier: Optional[float] = None,
613
613
  ):
614
614
  super().__init__()
615
- inner_dim = int(2 * inner_dim / 3)
616
615
  # custom hidden_size factor multiplier
617
616
  if ffn_dim_multiplier is not None:
618
617
  inner_dim = int(ffn_dim_multiplier * inner_dim)