diffusers 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +198 -28
  13. diffusers/loaders/lora_conversion_utils.py +679 -44
  14. diffusers/loaders/lora_pipeline.py +1963 -801
  15. diffusers/loaders/peft.py +169 -84
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +653 -75
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +22 -32
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +409 -49
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +10 -2
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +14 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.1.dist-info/RECORD +0 -550
  387. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,21 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Union
3
+
4
+ import numpy as np
5
+ import PIL.Image
6
+
7
+ from ...utils import BaseOutput
8
+
9
+
10
+ @dataclass
11
+ class CogView4PipelineOutput(BaseOutput):
12
+ """
13
+ Output class for CogView3 pipelines.
14
+
15
+ Args:
16
+ images (`List[PIL.Image.Image]` or `np.ndarray`)
17
+ List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
18
+ num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
19
+ """
20
+
21
+ images: Union[List[PIL.Image.Image], np.ndarray]
@@ -0,0 +1,49 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_opencv_available,
9
+ is_torch_available,
10
+ is_transformers_available,
11
+ )
12
+
13
+
14
+ _dummy_objects = {}
15
+ _import_structure = {}
16
+
17
+
18
+ try:
19
+ if not (is_transformers_available() and is_torch_available() and is_opencv_available()):
20
+ raise OptionalDependencyNotAvailable()
21
+ except OptionalDependencyNotAvailable:
22
+ from ...utils import dummy_torch_and_transformers_and_opencv_objects # noqa F403
23
+
24
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_opencv_objects))
25
+ else:
26
+ _import_structure["pipeline_consisid"] = ["ConsisIDPipeline"]
27
+
28
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
29
+ try:
30
+ if not (is_transformers_available() and is_torch_available()):
31
+ raise OptionalDependencyNotAvailable()
32
+
33
+ except OptionalDependencyNotAvailable:
34
+ from ...utils.dummy_torch_and_transformers_objects import *
35
+ else:
36
+ from .pipeline_consisid import ConsisIDPipeline
37
+
38
+ else:
39
+ import sys
40
+
41
+ sys.modules[__name__] = _LazyModule(
42
+ __name__,
43
+ globals()["__file__"],
44
+ _import_structure,
45
+ module_spec=__spec__,
46
+ )
47
+
48
+ for name, value in _dummy_objects.items():
49
+ setattr(sys.modules[__name__], name, value)
@@ -0,0 +1,357 @@
1
+ import importlib.util
2
+ import os
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import torch
7
+ from PIL import Image, ImageOps
8
+ from torchvision.transforms import InterpolationMode
9
+ from torchvision.transforms.functional import normalize, resize
10
+
11
+ from ...utils import get_logger, load_image
12
+
13
+
14
+ logger = get_logger(__name__)
15
+
16
+ _insightface_available = importlib.util.find_spec("insightface") is not None
17
+ _consisid_eva_clip_available = importlib.util.find_spec("consisid_eva_clip") is not None
18
+ _facexlib_available = importlib.util.find_spec("facexlib") is not None
19
+
20
+ if _insightface_available:
21
+ import insightface
22
+ from insightface.app import FaceAnalysis
23
+ else:
24
+ raise ImportError("insightface is not available. Please install it using 'pip install insightface'.")
25
+
26
+ if _consisid_eva_clip_available:
27
+ from consisid_eva_clip import create_model_and_transforms
28
+ from consisid_eva_clip.constants import OPENAI_DATASET_MEAN, OPENAI_DATASET_STD
29
+ else:
30
+ raise ImportError("consisid_eva_clip is not available. Please install it using 'pip install consisid_eva_clip'.")
31
+
32
+ if _facexlib_available:
33
+ from facexlib.parsing import init_parsing_model
34
+ from facexlib.utils.face_restoration_helper import FaceRestoreHelper
35
+ else:
36
+ raise ImportError("facexlib is not available. Please install it using 'pip install facexlib'.")
37
+
38
+
39
+ def resize_numpy_image_long(image, resize_long_edge=768):
40
+ """
41
+ Resize the input image to a specified long edge while maintaining aspect ratio.
42
+
43
+ Args:
44
+ image (numpy.ndarray): Input image (H x W x C or H x W).
45
+ resize_long_edge (int): The target size for the long edge of the image. Default is 768.
46
+
47
+ Returns:
48
+ numpy.ndarray: Resized image with the long edge matching `resize_long_edge`, while maintaining the aspect
49
+ ratio.
50
+ """
51
+
52
+ h, w = image.shape[:2]
53
+ if max(h, w) <= resize_long_edge:
54
+ return image
55
+ k = resize_long_edge / max(h, w)
56
+ h = int(h * k)
57
+ w = int(w * k)
58
+ image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LANCZOS4)
59
+ return image
60
+
61
+
62
+ def img2tensor(imgs, bgr2rgb=True, float32=True):
63
+ """Numpy array to tensor.
64
+
65
+ Args:
66
+ imgs (list[ndarray] | ndarray): Input images.
67
+ bgr2rgb (bool): Whether to change bgr to rgb.
68
+ float32 (bool): Whether to change to float32.
69
+
70
+ Returns:
71
+ list[tensor] | tensor: Tensor images. If returned results only have
72
+ one element, just return tensor.
73
+ """
74
+
75
+ def _totensor(img, bgr2rgb, float32):
76
+ if img.shape[2] == 3 and bgr2rgb:
77
+ if img.dtype == "float64":
78
+ img = img.astype("float32")
79
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
80
+ img = torch.from_numpy(img.transpose(2, 0, 1))
81
+ if float32:
82
+ img = img.float()
83
+ return img
84
+
85
+ if isinstance(imgs, list):
86
+ return [_totensor(img, bgr2rgb, float32) for img in imgs]
87
+ return _totensor(imgs, bgr2rgb, float32)
88
+
89
+
90
+ def to_gray(img):
91
+ """
92
+ Converts an RGB image to grayscale by applying the standard luminosity formula.
93
+
94
+ Args:
95
+ img (torch.Tensor): The input image tensor with shape (batch_size, channels, height, width).
96
+ The image is expected to be in RGB format (3 channels).
97
+
98
+ Returns:
99
+ torch.Tensor: The grayscale image tensor with shape (batch_size, 3, height, width).
100
+ The grayscale values are replicated across all three channels.
101
+ """
102
+ x = 0.299 * img[:, 0:1] + 0.587 * img[:, 1:2] + 0.114 * img[:, 2:3]
103
+ x = x.repeat(1, 3, 1, 1)
104
+ return x
105
+
106
+
107
+ def process_face_embeddings(
108
+ face_helper_1,
109
+ clip_vision_model,
110
+ face_helper_2,
111
+ eva_transform_mean,
112
+ eva_transform_std,
113
+ app,
114
+ device,
115
+ weight_dtype,
116
+ image,
117
+ original_id_image=None,
118
+ is_align_face=True,
119
+ ):
120
+ """
121
+ Process face embeddings from an image, extracting relevant features such as face embeddings, landmarks, and parsed
122
+ face features using a series of face detection and alignment tools.
123
+
124
+ Args:
125
+ face_helper_1: Face helper object (first helper) for alignment and landmark detection.
126
+ clip_vision_model: Pre-trained CLIP vision model used for feature extraction.
127
+ face_helper_2: Face helper object (second helper) for embedding extraction.
128
+ eva_transform_mean: Mean values for image normalization before passing to EVA model.
129
+ eva_transform_std: Standard deviation values for image normalization before passing to EVA model.
130
+ app: Application instance used for face detection.
131
+ device: Device (CPU or GPU) where the computations will be performed.
132
+ weight_dtype: Data type of the weights for precision (e.g., `torch.float32`).
133
+ image: Input image in RGB format with pixel values in the range [0, 255].
134
+ original_id_image: (Optional) Original image for feature extraction if `is_align_face` is False.
135
+ is_align_face: Boolean flag indicating whether face alignment should be performed.
136
+
137
+ Returns:
138
+ Tuple:
139
+ - id_cond: Concatenated tensor of Ante face embedding and CLIP vision embedding
140
+ - id_vit_hidden: Hidden state of the CLIP vision model, a list of tensors.
141
+ - return_face_features_image_2: Processed face features image after normalization and parsing.
142
+ - face_kps: Keypoints of the face detected in the image.
143
+ """
144
+
145
+ face_helper_1.clean_all()
146
+ image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
147
+ # get antelopev2 embedding
148
+ face_info = app.get(image_bgr)
149
+ if len(face_info) > 0:
150
+ face_info = sorted(face_info, key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]))[
151
+ -1
152
+ ] # only use the maximum face
153
+ id_ante_embedding = face_info["embedding"] # (512,)
154
+ face_kps = face_info["kps"]
155
+ else:
156
+ id_ante_embedding = None
157
+ face_kps = None
158
+
159
+ # using facexlib to detect and align face
160
+ face_helper_1.read_image(image_bgr)
161
+ face_helper_1.get_face_landmarks_5(only_center_face=True)
162
+ if face_kps is None:
163
+ face_kps = face_helper_1.all_landmarks_5[0]
164
+ face_helper_1.align_warp_face()
165
+ if len(face_helper_1.cropped_faces) == 0:
166
+ raise RuntimeError("facexlib align face fail")
167
+ align_face = face_helper_1.cropped_faces[0] # (512, 512, 3) # RGB
168
+
169
+ # incase insightface didn't detect face
170
+ if id_ante_embedding is None:
171
+ logger.warning("Failed to detect face using insightface. Extracting embedding with align face")
172
+ id_ante_embedding = face_helper_2.get_feat(align_face)
173
+
174
+ id_ante_embedding = torch.from_numpy(id_ante_embedding).to(device, weight_dtype) # torch.Size([512])
175
+ if id_ante_embedding.ndim == 1:
176
+ id_ante_embedding = id_ante_embedding.unsqueeze(0) # torch.Size([1, 512])
177
+
178
+ # parsing
179
+ if is_align_face:
180
+ input = img2tensor(align_face, bgr2rgb=True).unsqueeze(0) / 255.0 # torch.Size([1, 3, 512, 512])
181
+ input = input.to(device)
182
+ parsing_out = face_helper_1.face_parse(normalize(input, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))[0]
183
+ parsing_out = parsing_out.argmax(dim=1, keepdim=True) # torch.Size([1, 1, 512, 512])
184
+ bg_label = [0, 16, 18, 7, 8, 9, 14, 15]
185
+ bg = sum(parsing_out == i for i in bg_label).bool()
186
+ white_image = torch.ones_like(input) # torch.Size([1, 3, 512, 512])
187
+ # only keep the face features
188
+ return_face_features_image = torch.where(bg, white_image, to_gray(input)) # torch.Size([1, 3, 512, 512])
189
+ return_face_features_image_2 = torch.where(bg, white_image, input) # torch.Size([1, 3, 512, 512])
190
+ else:
191
+ original_image_bgr = cv2.cvtColor(original_id_image, cv2.COLOR_RGB2BGR)
192
+ input = img2tensor(original_image_bgr, bgr2rgb=True).unsqueeze(0) / 255.0 # torch.Size([1, 3, 512, 512])
193
+ input = input.to(device)
194
+ return_face_features_image = return_face_features_image_2 = input
195
+
196
+ # transform img before sending to eva-clip-vit
197
+ face_features_image = resize(
198
+ return_face_features_image, clip_vision_model.image_size, InterpolationMode.BICUBIC
199
+ ) # torch.Size([1, 3, 336, 336])
200
+ face_features_image = normalize(face_features_image, eva_transform_mean, eva_transform_std)
201
+ id_cond_vit, id_vit_hidden = clip_vision_model(
202
+ face_features_image.to(weight_dtype), return_all_features=False, return_hidden=True, shuffle=False
203
+ ) # torch.Size([1, 768]), list(torch.Size([1, 577, 1024]))
204
+ id_cond_vit_norm = torch.norm(id_cond_vit, 2, 1, True)
205
+ id_cond_vit = torch.div(id_cond_vit, id_cond_vit_norm)
206
+
207
+ id_cond = torch.cat(
208
+ [id_ante_embedding, id_cond_vit], dim=-1
209
+ ) # torch.Size([1, 512]), torch.Size([1, 768]) -> torch.Size([1, 1280])
210
+
211
+ return (
212
+ id_cond,
213
+ id_vit_hidden,
214
+ return_face_features_image_2,
215
+ face_kps,
216
+ ) # torch.Size([1, 1280]), list(torch.Size([1, 577, 1024]))
217
+
218
+
219
+ def process_face_embeddings_infer(
220
+ face_helper_1,
221
+ clip_vision_model,
222
+ face_helper_2,
223
+ eva_transform_mean,
224
+ eva_transform_std,
225
+ app,
226
+ device,
227
+ weight_dtype,
228
+ img_file_path,
229
+ is_align_face=True,
230
+ ):
231
+ """
232
+ Process face embeddings from an input image for inference, including alignment, feature extraction, and embedding
233
+ concatenation.
234
+
235
+ Args:
236
+ face_helper_1: Face helper object (first helper) for alignment and landmark detection.
237
+ clip_vision_model: Pre-trained CLIP vision model used for feature extraction.
238
+ face_helper_2: Face helper object (second helper) for embedding extraction.
239
+ eva_transform_mean: Mean values for image normalization before passing to EVA model.
240
+ eva_transform_std: Standard deviation values for image normalization before passing to EVA model.
241
+ app: Application instance used for face detection.
242
+ device: Device (CPU or GPU) where the computations will be performed.
243
+ weight_dtype: Data type of the weights for precision (e.g., `torch.float32`).
244
+ img_file_path: Path to the input image file (string) or a numpy array representing an image.
245
+ is_align_face: Boolean flag indicating whether face alignment should be performed (default: True).
246
+
247
+ Returns:
248
+ Tuple:
249
+ - id_cond: Concatenated tensor of Ante face embedding and CLIP vision embedding.
250
+ - id_vit_hidden: Hidden state of the CLIP vision model, a list of tensors.
251
+ - image: Processed face image after feature extraction and alignment.
252
+ - face_kps: Keypoints of the face detected in the image.
253
+ """
254
+
255
+ # Load and preprocess the input image
256
+ if isinstance(img_file_path, str):
257
+ image = np.array(load_image(image=img_file_path).convert("RGB"))
258
+ else:
259
+ image = np.array(ImageOps.exif_transpose(Image.fromarray(img_file_path)).convert("RGB"))
260
+
261
+ # Resize image to ensure the longer side is 1024 pixels
262
+ image = resize_numpy_image_long(image, 1024)
263
+ original_id_image = image
264
+
265
+ # Process the image to extract face embeddings and related features
266
+ id_cond, id_vit_hidden, align_crop_face_image, face_kps = process_face_embeddings(
267
+ face_helper_1,
268
+ clip_vision_model,
269
+ face_helper_2,
270
+ eva_transform_mean,
271
+ eva_transform_std,
272
+ app,
273
+ device,
274
+ weight_dtype,
275
+ image,
276
+ original_id_image,
277
+ is_align_face,
278
+ )
279
+
280
+ # Convert the aligned cropped face image (torch tensor) to a numpy array
281
+ tensor = align_crop_face_image.cpu().detach()
282
+ tensor = tensor.squeeze()
283
+ tensor = tensor.permute(1, 2, 0)
284
+ tensor = tensor.numpy() * 255
285
+ tensor = tensor.astype(np.uint8)
286
+ image = ImageOps.exif_transpose(Image.fromarray(tensor))
287
+
288
+ return id_cond, id_vit_hidden, image, face_kps
289
+
290
+
291
+ def prepare_face_models(model_path, device, dtype):
292
+ """
293
+ Prepare all face models for the facial recognition task.
294
+
295
+ Parameters:
296
+ - model_path: Path to the directory containing model files.
297
+ - device: The device (e.g., 'cuda', 'cpu') where models will be loaded.
298
+ - dtype: Data type (e.g., torch.float32) for model inference.
299
+
300
+ Returns:
301
+ - face_helper_1: First face restoration helper.
302
+ - face_helper_2: Second face restoration helper.
303
+ - face_clip_model: CLIP model for face extraction.
304
+ - eva_transform_mean: Mean value for image normalization.
305
+ - eva_transform_std: Standard deviation value for image normalization.
306
+ - face_main_model: Main face analysis model.
307
+ """
308
+ # get helper model
309
+ face_helper_1 = FaceRestoreHelper(
310
+ upscale_factor=1,
311
+ face_size=512,
312
+ crop_ratio=(1, 1),
313
+ det_model="retinaface_resnet50",
314
+ save_ext="png",
315
+ device=device,
316
+ model_rootpath=os.path.join(model_path, "face_encoder"),
317
+ )
318
+ face_helper_1.face_parse = None
319
+ face_helper_1.face_parse = init_parsing_model(
320
+ model_name="bisenet", device=device, model_rootpath=os.path.join(model_path, "face_encoder")
321
+ )
322
+ face_helper_2 = insightface.model_zoo.get_model(
323
+ f"{model_path}/face_encoder/models/antelopev2/glintr100.onnx", providers=["CUDAExecutionProvider"]
324
+ )
325
+ face_helper_2.prepare(ctx_id=0)
326
+
327
+ # get local facial extractor part 1
328
+ model, _, _ = create_model_and_transforms(
329
+ "EVA02-CLIP-L-14-336",
330
+ os.path.join(model_path, "face_encoder", "EVA02_CLIP_L_336_psz14_s6B.pt"),
331
+ force_custom_clip=True,
332
+ )
333
+ face_clip_model = model.visual
334
+ eva_transform_mean = getattr(face_clip_model, "image_mean", OPENAI_DATASET_MEAN)
335
+ eva_transform_std = getattr(face_clip_model, "image_std", OPENAI_DATASET_STD)
336
+ if not isinstance(eva_transform_mean, (list, tuple)):
337
+ eva_transform_mean = (eva_transform_mean,) * 3
338
+ if not isinstance(eva_transform_std, (list, tuple)):
339
+ eva_transform_std = (eva_transform_std,) * 3
340
+ eva_transform_mean = eva_transform_mean
341
+ eva_transform_std = eva_transform_std
342
+
343
+ # get local facial extractor part 2
344
+ face_main_model = FaceAnalysis(
345
+ name="antelopev2", root=os.path.join(model_path, "face_encoder"), providers=["CUDAExecutionProvider"]
346
+ )
347
+ face_main_model.prepare(ctx_id=0, det_size=(640, 640))
348
+
349
+ # move face models to device
350
+ face_helper_1.face_det.eval()
351
+ face_helper_1.face_parse.eval()
352
+ face_clip_model.eval()
353
+ face_helper_1.face_det.to(device)
354
+ face_helper_1.face_parse.to(device)
355
+ face_clip_model.to(device, dtype=dtype)
356
+
357
+ return face_helper_1, face_helper_2, face_clip_model, face_main_model, eva_transform_mean, eva_transform_std