diffusers 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +198 -28
  13. diffusers/loaders/lora_conversion_utils.py +679 -44
  14. diffusers/loaders/lora_pipeline.py +1963 -801
  15. diffusers/loaders/peft.py +169 -84
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +653 -75
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +22 -32
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +409 -49
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +10 -2
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +14 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.1.dist-info/RECORD +0 -550
  387. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -18,10 +18,17 @@ from typing import Any, Callable, Dict, List, Optional, Union
18
18
  import numpy as np
19
19
  import PIL.Image
20
20
  import torch
21
- from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
21
+ from transformers import (
22
+ CLIPImageProcessor,
23
+ CLIPTextModel,
24
+ CLIPTokenizer,
25
+ CLIPVisionModelWithProjection,
26
+ T5EncoderModel,
27
+ T5TokenizerFast,
28
+ )
22
29
 
23
30
  from ...image_processor import PipelineImageInput, VaeImageProcessor
24
- from ...loaders import FluxLoraLoaderMixin, TextualInversionLoaderMixin
31
+ from ...loaders import FluxIPAdapterMixin, FluxLoraLoaderMixin, TextualInversionLoaderMixin
25
32
  from ...models.autoencoders import AutoencoderKL
26
33
  from ...models.transformers import FluxTransformer2DModel
27
34
  from ...schedulers import FlowMatchEulerDiscreteScheduler
@@ -74,7 +81,7 @@ def calculate_shift(
74
81
  base_seq_len: int = 256,
75
82
  max_seq_len: int = 4096,
76
83
  base_shift: float = 0.5,
77
- max_shift: float = 1.16,
84
+ max_shift: float = 1.15,
78
85
  ):
79
86
  m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
80
87
  b = base_shift - m * base_seq_len
@@ -156,7 +163,7 @@ def retrieve_timesteps(
156
163
  return timesteps, num_inference_steps
157
164
 
158
165
 
159
- class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
166
+ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FluxIPAdapterMixin):
160
167
  r"""
161
168
  The Flux pipeline for image inpainting.
162
169
 
@@ -183,8 +190,8 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
183
190
  [T5TokenizerFast](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast).
184
191
  """
185
192
 
186
- model_cpu_offload_seq = "text_encoder->text_encoder_2->transformer->vae"
187
- _optional_components = []
193
+ model_cpu_offload_seq = "text_encoder->text_encoder_2->image_encoder->transformer->vae"
194
+ _optional_components = ["image_encoder", "feature_extractor"]
188
195
  _callback_tensor_inputs = ["latents", "prompt_embeds"]
189
196
 
190
197
  def __init__(
@@ -196,6 +203,8 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
196
203
  text_encoder_2: T5EncoderModel,
197
204
  tokenizer_2: T5TokenizerFast,
198
205
  transformer: FluxTransformer2DModel,
206
+ image_encoder: CLIPVisionModelWithProjection = None,
207
+ feature_extractor: CLIPImageProcessor = None,
199
208
  ):
200
209
  super().__init__()
201
210
 
@@ -207,16 +216,19 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
207
216
  tokenizer_2=tokenizer_2,
208
217
  transformer=transformer,
209
218
  scheduler=scheduler,
219
+ image_encoder=image_encoder,
220
+ feature_extractor=feature_extractor,
210
221
  )
211
- self.vae_scale_factor = (
212
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
213
- )
222
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
214
223
  # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
215
224
  # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
216
- self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
225
+ self.latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
226
+ self.image_processor = VaeImageProcessor(
227
+ vae_scale_factor=self.vae_scale_factor * 2, vae_latent_channels=self.latent_channels
228
+ )
217
229
  self.mask_processor = VaeImageProcessor(
218
230
  vae_scale_factor=self.vae_scale_factor * 2,
219
- vae_latent_channels=self.vae.config.latent_channels,
231
+ vae_latent_channels=self.latent_channels,
220
232
  do_normalize=False,
221
233
  do_binarize=True,
222
234
  do_convert_grayscale=True,
@@ -401,6 +413,55 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
401
413
 
402
414
  return prompt_embeds, pooled_prompt_embeds, text_ids
403
415
 
416
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.encode_image
417
+ def encode_image(self, image, device, num_images_per_prompt):
418
+ dtype = next(self.image_encoder.parameters()).dtype
419
+
420
+ if not isinstance(image, torch.Tensor):
421
+ image = self.feature_extractor(image, return_tensors="pt").pixel_values
422
+
423
+ image = image.to(device=device, dtype=dtype)
424
+ image_embeds = self.image_encoder(image).image_embeds
425
+ image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
426
+ return image_embeds
427
+
428
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.prepare_ip_adapter_image_embeds
429
+ def prepare_ip_adapter_image_embeds(
430
+ self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt
431
+ ):
432
+ image_embeds = []
433
+ if ip_adapter_image_embeds is None:
434
+ if not isinstance(ip_adapter_image, list):
435
+ ip_adapter_image = [ip_adapter_image]
436
+
437
+ if len(ip_adapter_image) != self.transformer.encoder_hid_proj.num_ip_adapters:
438
+ raise ValueError(
439
+ f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {self.transformer.encoder_hid_proj.num_ip_adapters} IP Adapters."
440
+ )
441
+
442
+ for single_ip_adapter_image in ip_adapter_image:
443
+ single_image_embeds = self.encode_image(single_ip_adapter_image, device, 1)
444
+ image_embeds.append(single_image_embeds[None, :])
445
+ else:
446
+ if not isinstance(ip_adapter_image_embeds, list):
447
+ ip_adapter_image_embeds = [ip_adapter_image_embeds]
448
+
449
+ if len(ip_adapter_image_embeds) != self.transformer.encoder_hid_proj.num_ip_adapters:
450
+ raise ValueError(
451
+ f"`ip_adapter_image_embeds` must have same length as the number of IP Adapters. Got {len(ip_adapter_image_embeds)} image embeds and {self.transformer.encoder_hid_proj.num_ip_adapters} IP Adapters."
452
+ )
453
+
454
+ for single_image_embeds in ip_adapter_image_embeds:
455
+ image_embeds.append(single_image_embeds)
456
+
457
+ ip_adapter_image_embeds = []
458
+ for single_image_embeds in image_embeds:
459
+ single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
460
+ single_image_embeds = single_image_embeds.to(device=device)
461
+ ip_adapter_image_embeds.append(single_image_embeds)
462
+
463
+ return ip_adapter_image_embeds
464
+
404
465
  # Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3_inpaint.StableDiffusion3InpaintPipeline._encode_vae_image
405
466
  def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator):
406
467
  if isinstance(generator, list):
@@ -438,8 +499,12 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
438
499
  height,
439
500
  width,
440
501
  output_type,
502
+ negative_prompt=None,
503
+ negative_prompt_2=None,
441
504
  prompt_embeds=None,
505
+ negative_prompt_embeds=None,
442
506
  pooled_prompt_embeds=None,
507
+ negative_pooled_prompt_embeds=None,
443
508
  callback_on_step_end_tensor_inputs=None,
444
509
  padding_mask_crop=None,
445
510
  max_sequence_length=None,
@@ -478,15 +543,38 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
478
543
  elif prompt_2 is not None and (not isinstance(prompt_2, str) and not isinstance(prompt_2, list)):
479
544
  raise ValueError(f"`prompt_2` has to be of type `str` or `list` but is {type(prompt_2)}")
480
545
 
546
+ if negative_prompt is not None and negative_prompt_embeds is not None:
547
+ raise ValueError(
548
+ f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
549
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
550
+ )
551
+ elif negative_prompt_2 is not None and negative_prompt_embeds is not None:
552
+ raise ValueError(
553
+ f"Cannot forward both `negative_prompt_2`: {negative_prompt_2} and `negative_prompt_embeds`:"
554
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
555
+ )
556
+
557
+ if prompt_embeds is not None and negative_prompt_embeds is not None:
558
+ if prompt_embeds.shape != negative_prompt_embeds.shape:
559
+ raise ValueError(
560
+ "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
561
+ f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
562
+ f" {negative_prompt_embeds.shape}."
563
+ )
564
+
481
565
  if prompt_embeds is not None and pooled_prompt_embeds is None:
482
566
  raise ValueError(
483
567
  "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
484
568
  )
569
+ if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
570
+ raise ValueError(
571
+ "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
572
+ )
485
573
 
486
574
  if padding_mask_crop is not None:
487
575
  if not isinstance(image, PIL.Image.Image):
488
576
  raise ValueError(
489
- f"The image should be a PIL image when inpainting mask crop, but is of type" f" {type(image)}."
577
+ f"The image should be a PIL image when inpainting mask crop, but is of type {type(image)}."
490
578
  )
491
579
  if not isinstance(mask_image, PIL.Image.Image):
492
580
  raise ValueError(
@@ -494,7 +582,7 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
494
582
  f" {type(mask_image)}."
495
583
  )
496
584
  if output_type != "pil":
497
- raise ValueError(f"The output type should be PIL when inpainting mask crop, but is" f" {output_type}.")
585
+ raise ValueError(f"The output type should be PIL when inpainting mask crop, but is {output_type}.")
498
586
 
499
587
  if max_sequence_length is not None and max_sequence_length > 512:
500
588
  raise ValueError(f"`max_sequence_length` cannot be greater than 512 but is {max_sequence_length}")
@@ -567,7 +655,10 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
567
655
  latent_image_ids = self._prepare_latent_image_ids(batch_size, height // 2, width // 2, device, dtype)
568
656
 
569
657
  image = image.to(device=device, dtype=dtype)
570
- image_latents = self._encode_vae_image(image=image, generator=generator)
658
+ if image.shape[1] != self.latent_channels:
659
+ image_latents = self._encode_vae_image(image=image, generator=generator)
660
+ else:
661
+ image_latents = image
571
662
 
572
663
  if batch_size > image_latents.shape[0] and batch_size % image_latents.shape[0] == 0:
573
664
  # expand init_latents for batch_size
@@ -624,7 +715,9 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
624
715
  else:
625
716
  masked_image_latents = retrieve_latents(self.vae.encode(masked_image), generator=generator)
626
717
 
627
- masked_image_latents = (masked_image_latents - self.vae.config.shift_factor) * self.vae.config.scaling_factor
718
+ masked_image_latents = (
719
+ masked_image_latents - self.vae.config.shift_factor
720
+ ) * self.vae.config.scaling_factor
628
721
 
629
722
  # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method
630
723
  if mask.shape[0] < batch_size:
@@ -685,6 +778,9 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
685
778
  self,
686
779
  prompt: Union[str, List[str]] = None,
687
780
  prompt_2: Optional[Union[str, List[str]]] = None,
781
+ negative_prompt: Union[str, List[str]] = None,
782
+ negative_prompt_2: Optional[Union[str, List[str]]] = None,
783
+ true_cfg_scale: float = 1.0,
688
784
  image: PipelineImageInput = None,
689
785
  mask_image: PipelineImageInput = None,
690
786
  masked_image_latents: PipelineImageInput = None,
@@ -700,6 +796,12 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
700
796
  latents: Optional[torch.FloatTensor] = None,
701
797
  prompt_embeds: Optional[torch.FloatTensor] = None,
702
798
  pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
799
+ ip_adapter_image: Optional[PipelineImageInput] = None,
800
+ ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
801
+ negative_ip_adapter_image: Optional[PipelineImageInput] = None,
802
+ negative_ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
803
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
804
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
703
805
  output_type: Optional[str] = "pil",
704
806
  return_dict: bool = True,
705
807
  joint_attention_kwargs: Optional[Dict[str, Any]] = None,
@@ -778,6 +880,17 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
778
880
  pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
779
881
  Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
780
882
  If not provided, pooled text embeddings will be generated from `prompt` input argument.
883
+ ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
884
+ ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
885
+ Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
886
+ IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. If not
887
+ provided, embeddings are computed from the `ip_adapter_image` input argument.
888
+ negative_ip_adapter_image:
889
+ (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
890
+ negative_ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
891
+ Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
892
+ IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. If not
893
+ provided, embeddings are computed from the `ip_adapter_image` input argument.
781
894
  output_type (`str`, *optional*, defaults to `"pil"`):
782
895
  The output format of the generate image. Choose between
783
896
  [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -819,8 +932,12 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
819
932
  height,
820
933
  width,
821
934
  output_type=output_type,
935
+ negative_prompt=negative_prompt,
936
+ negative_prompt_2=negative_prompt_2,
822
937
  prompt_embeds=prompt_embeds,
938
+ negative_prompt_embeds=negative_prompt_embeds,
823
939
  pooled_prompt_embeds=pooled_prompt_embeds,
940
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
824
941
  callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
825
942
  padding_mask_crop=padding_mask_crop,
826
943
  max_sequence_length=max_sequence_length,
@@ -857,6 +974,7 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
857
974
  lora_scale = (
858
975
  self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
859
976
  )
977
+ do_true_cfg = true_cfg_scale > 1 and negative_prompt is not None
860
978
  (
861
979
  prompt_embeds,
862
980
  pooled_prompt_embeds,
@@ -871,16 +989,31 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
871
989
  max_sequence_length=max_sequence_length,
872
990
  lora_scale=lora_scale,
873
991
  )
992
+ if do_true_cfg:
993
+ (
994
+ negative_prompt_embeds,
995
+ negative_pooled_prompt_embeds,
996
+ _,
997
+ ) = self.encode_prompt(
998
+ prompt=negative_prompt,
999
+ prompt_2=negative_prompt_2,
1000
+ prompt_embeds=negative_prompt_embeds,
1001
+ pooled_prompt_embeds=negative_pooled_prompt_embeds,
1002
+ device=device,
1003
+ num_images_per_prompt=num_images_per_prompt,
1004
+ max_sequence_length=max_sequence_length,
1005
+ lora_scale=lora_scale,
1006
+ )
874
1007
 
875
1008
  # 4.Prepare timesteps
876
1009
  sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) if sigmas is None else sigmas
877
1010
  image_seq_len = (int(height) // self.vae_scale_factor // 2) * (int(width) // self.vae_scale_factor // 2)
878
1011
  mu = calculate_shift(
879
1012
  image_seq_len,
880
- self.scheduler.config.base_image_seq_len,
881
- self.scheduler.config.max_image_seq_len,
882
- self.scheduler.config.base_shift,
883
- self.scheduler.config.max_shift,
1013
+ self.scheduler.config.get("base_image_seq_len", 256),
1014
+ self.scheduler.config.get("max_image_seq_len", 4096),
1015
+ self.scheduler.config.get("base_shift", 0.5),
1016
+ self.scheduler.config.get("max_shift", 1.15),
884
1017
  )
885
1018
  timesteps, num_inference_steps = retrieve_timesteps(
886
1019
  self.scheduler,
@@ -947,12 +1080,43 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
947
1080
  else:
948
1081
  guidance = None
949
1082
 
1083
+ if (ip_adapter_image is not None or ip_adapter_image_embeds is not None) and (
1084
+ negative_ip_adapter_image is None and negative_ip_adapter_image_embeds is None
1085
+ ):
1086
+ negative_ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
1087
+ elif (ip_adapter_image is None and ip_adapter_image_embeds is None) and (
1088
+ negative_ip_adapter_image is not None or negative_ip_adapter_image_embeds is not None
1089
+ ):
1090
+ ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
1091
+
1092
+ if self.joint_attention_kwargs is None:
1093
+ self._joint_attention_kwargs = {}
1094
+
1095
+ image_embeds = None
1096
+ negative_image_embeds = None
1097
+ if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
1098
+ image_embeds = self.prepare_ip_adapter_image_embeds(
1099
+ ip_adapter_image,
1100
+ ip_adapter_image_embeds,
1101
+ device,
1102
+ batch_size * num_images_per_prompt,
1103
+ )
1104
+ if negative_ip_adapter_image is not None or negative_ip_adapter_image_embeds is not None:
1105
+ negative_image_embeds = self.prepare_ip_adapter_image_embeds(
1106
+ negative_ip_adapter_image,
1107
+ negative_ip_adapter_image_embeds,
1108
+ device,
1109
+ batch_size * num_images_per_prompt,
1110
+ )
1111
+
950
1112
  # 6. Denoising loop
951
1113
  with self.progress_bar(total=num_inference_steps) as progress_bar:
952
1114
  for i, t in enumerate(timesteps):
953
1115
  if self.interrupt:
954
1116
  continue
955
1117
 
1118
+ if image_embeds is not None:
1119
+ self._joint_attention_kwargs["ip_adapter_image_embeds"] = image_embeds
956
1120
  # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
957
1121
  timestep = t.expand(latents.shape[0]).to(latents.dtype)
958
1122
  noise_pred = self.transformer(
@@ -967,6 +1131,22 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
967
1131
  return_dict=False,
968
1132
  )[0]
969
1133
 
1134
+ if do_true_cfg:
1135
+ if negative_image_embeds is not None:
1136
+ self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
1137
+ neg_noise_pred = self.transformer(
1138
+ hidden_states=latents,
1139
+ timestep=timestep / 1000,
1140
+ guidance=guidance,
1141
+ pooled_projections=negative_pooled_prompt_embeds,
1142
+ encoder_hidden_states=negative_prompt_embeds,
1143
+ txt_ids=text_ids,
1144
+ img_ids=latent_image_ids,
1145
+ joint_attention_kwargs=self.joint_attention_kwargs,
1146
+ return_dict=False,
1147
+ )[0]
1148
+ noise_pred = neg_noise_pred + true_cfg_scale * (noise_pred - neg_noise_pred)
1149
+
970
1150
  # compute the previous noisy sample x_t -> x_t-1
971
1151
  latents_dtype = latents.dtype
972
1152
  latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
@@ -341,9 +341,9 @@ class AnimateDiffFreeNoiseMixin:
341
341
  start_tensor = negative_prompt_embeds[i].unsqueeze(0)
342
342
  end_tensor = negative_prompt_embeds[i + 1].unsqueeze(0)
343
343
 
344
- negative_prompt_interpolation_embeds[
345
- start_frame : end_frame + 1
346
- ] = self._free_noise_prompt_interpolation_callback(start_frame, end_frame, start_tensor, end_tensor)
344
+ negative_prompt_interpolation_embeds[start_frame : end_frame + 1] = (
345
+ self._free_noise_prompt_interpolation_callback(start_frame, end_frame, start_tensor, end_tensor)
346
+ )
347
347
 
348
348
  prompt_embeds = prompt_interpolation_embeds
349
349
  negative_prompt_embeds = negative_prompt_interpolation_embeds
@@ -22,7 +22,9 @@ except OptionalDependencyNotAvailable:
22
22
 
23
23
  _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
24
  else:
25
+ _import_structure["pipeline_hunyuan_skyreels_image2video"] = ["HunyuanSkyreelsImageToVideoPipeline"]
25
26
  _import_structure["pipeline_hunyuan_video"] = ["HunyuanVideoPipeline"]
27
+ _import_structure["pipeline_hunyuan_video_image2video"] = ["HunyuanVideoImageToVideoPipeline"]
26
28
 
27
29
  if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
28
30
  try:
@@ -32,7 +34,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
32
34
  except OptionalDependencyNotAvailable:
33
35
  from ...utils.dummy_torch_and_transformers_objects import *
34
36
  else:
37
+ from .pipeline_hunyuan_skyreels_image2video import HunyuanSkyreelsImageToVideoPipeline
35
38
  from .pipeline_hunyuan_video import HunyuanVideoPipeline
39
+ from .pipeline_hunyuan_video_image2video import HunyuanVideoImageToVideoPipeline
36
40
 
37
41
  else:
38
42
  import sys