diffusers 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +198 -28
  13. diffusers/loaders/lora_conversion_utils.py +679 -44
  14. diffusers/loaders/lora_pipeline.py +1963 -801
  15. diffusers/loaders/peft.py +169 -84
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +653 -75
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +22 -32
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +409 -49
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +10 -2
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +14 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.1.dist-info/RECORD +0 -550
  387. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,20 @@
1
+ from dataclasses import dataclass
2
+
3
+ import torch
4
+
5
+ from diffusers.utils import BaseOutput
6
+
7
+
8
+ @dataclass
9
+ class ConsisIDPipelineOutput(BaseOutput):
10
+ r"""
11
+ Output class for ConsisID pipelines.
12
+
13
+ Args:
14
+ frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
15
+ List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
16
+ denoised PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
17
+ `(batch_size, num_frames, channels, height, width)`.
18
+ """
19
+
20
+ frames: torch.Tensor
@@ -19,6 +19,7 @@ import torch
19
19
  from ...models import UNet2DModel
20
20
  from ...schedulers import CMStochasticIterativeScheduler
21
21
  from ...utils import (
22
+ is_torch_xla_available,
22
23
  logging,
23
24
  replace_example_docstring,
24
25
  )
@@ -26,6 +27,13 @@ from ...utils.torch_utils import randn_tensor
26
27
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
27
28
 
28
29
 
30
+ if is_torch_xla_available():
31
+ import torch_xla.core.xla_model as xm
32
+
33
+ XLA_AVAILABLE = True
34
+ else:
35
+ XLA_AVAILABLE = False
36
+
29
37
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
30
38
 
31
39
 
@@ -263,6 +271,9 @@ class ConsistencyModelPipeline(DiffusionPipeline):
263
271
  if callback is not None and i % callback_steps == 0:
264
272
  callback(i, t, sample)
265
273
 
274
+ if XLA_AVAILABLE:
275
+ xm.mark_step()
276
+
266
277
  # 6. Post-process image sample
267
278
  image = self.postprocess_image(sample, output_type=output_type)
268
279
 
@@ -80,7 +80,7 @@ EXAMPLE_DOC_STRING = """
80
80
  >>> # load control net and stable diffusion v1-5
81
81
  >>> controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
82
82
  >>> pipe = StableDiffusionControlNetPipeline.from_pretrained(
83
- ... "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
83
+ ... "stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
84
84
  ... )
85
85
 
86
86
  >>> # speed up diffusion process with faster scheduler and memory optimization
@@ -198,8 +198,8 @@ class StableDiffusionControlNetPipeline(
198
198
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
199
199
  safety_checker ([`StableDiffusionSafetyChecker`]):
200
200
  Classification module that estimates whether generated images could be considered offensive or harmful.
201
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
202
- about a model's potential harms.
201
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
202
+ more details about a model's potential harms.
203
203
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
204
204
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
205
205
  """
@@ -207,7 +207,7 @@ class StableDiffusionControlNetPipeline(
207
207
  model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae"
208
208
  _optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
209
209
  _exclude_from_cpu_offload = ["safety_checker"]
210
- _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
210
+ _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds", "image"]
211
211
 
212
212
  def __init__(
213
213
  self,
@@ -254,7 +254,7 @@ class StableDiffusionControlNetPipeline(
254
254
  feature_extractor=feature_extractor,
255
255
  image_encoder=image_encoder,
256
256
  )
257
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
257
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
258
258
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
259
259
  self.control_image_processor = VaeImageProcessor(
260
260
  vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
@@ -1323,6 +1323,7 @@ class StableDiffusionControlNetPipeline(
1323
1323
  latents = callback_outputs.pop("latents", latents)
1324
1324
  prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
1325
1325
  negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
1326
+ image = callback_outputs.pop("image", image)
1326
1327
 
1327
1328
  # call the callback, if provided
1328
1329
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
@@ -21,6 +21,7 @@ from transformers import CLIPTokenizer
21
21
  from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
22
22
  from ...schedulers import PNDMScheduler
23
23
  from ...utils import (
24
+ is_torch_xla_available,
24
25
  logging,
25
26
  replace_example_docstring,
26
27
  )
@@ -31,8 +32,16 @@ from ..blip_diffusion.modeling_ctx_clip import ContextCLIPTextModel
31
32
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
32
33
 
33
34
 
35
+ if is_torch_xla_available():
36
+ import torch_xla.core.xla_model as xm
37
+
38
+ XLA_AVAILABLE = True
39
+ else:
40
+ XLA_AVAILABLE = False
41
+
34
42
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
35
43
 
44
+
36
45
  EXAMPLE_DOC_STRING = """
37
46
  Examples:
38
47
  ```py
@@ -401,6 +410,10 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
401
410
  t,
402
411
  latents,
403
412
  )["prev_sample"]
413
+
414
+ if XLA_AVAILABLE:
415
+ xm.mark_step()
416
+
404
417
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
405
418
  image = self.image_processor.postprocess(image, output_type=output_type)
406
419
 
@@ -30,6 +30,7 @@ from ...schedulers import KarrasDiffusionSchedulers
30
30
  from ...utils import (
31
31
  USE_PEFT_BACKEND,
32
32
  deprecate,
33
+ is_torch_xla_available,
33
34
  logging,
34
35
  replace_example_docstring,
35
36
  scale_lora_layers,
@@ -41,6 +42,13 @@ from ..stable_diffusion import StableDiffusionPipelineOutput
41
42
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
42
43
 
43
44
 
45
+ if is_torch_xla_available():
46
+ import torch_xla.core.xla_model as xm
47
+
48
+ XLA_AVAILABLE = True
49
+ else:
50
+ XLA_AVAILABLE = False
51
+
44
52
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
45
53
 
46
54
 
@@ -71,7 +79,7 @@ EXAMPLE_DOC_STRING = """
71
79
  >>> # load control net and stable diffusion v1-5
72
80
  >>> controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
73
81
  >>> pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
74
- ... "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
82
+ ... "stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
75
83
  ... )
76
84
 
77
85
  >>> # speed up diffusion process with faster scheduler and memory optimization
@@ -168,8 +176,8 @@ class StableDiffusionControlNetImg2ImgPipeline(
168
176
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
169
177
  safety_checker ([`StableDiffusionSafetyChecker`]):
170
178
  Classification module that estimates whether generated images could be considered offensive or harmful.
171
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
172
- about a model's potential harms.
179
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
180
+ more details about a model's potential harms.
173
181
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
174
182
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
175
183
  """
@@ -177,7 +185,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
177
185
  model_cpu_offload_seq = "text_encoder->unet->vae"
178
186
  _optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
179
187
  _exclude_from_cpu_offload = ["safety_checker"]
180
- _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
188
+ _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds", "control_image"]
181
189
 
182
190
  def __init__(
183
191
  self,
@@ -224,7 +232,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
224
232
  feature_extractor=feature_extractor,
225
233
  image_encoder=image_encoder,
226
234
  )
227
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
235
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
228
236
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
229
237
  self.control_image_processor = VaeImageProcessor(
230
238
  vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
@@ -1286,6 +1294,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
1286
1294
  latents = callback_outputs.pop("latents", latents)
1287
1295
  prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
1288
1296
  negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
1297
+ control_image = callback_outputs.pop("control_image", control_image)
1289
1298
 
1290
1299
  # call the callback, if provided
1291
1300
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
@@ -1294,6 +1303,9 @@ class StableDiffusionControlNetImg2ImgPipeline(
1294
1303
  step_idx = i // getattr(self.scheduler, "order", 1)
1295
1304
  callback(step_idx, t, latents)
1296
1305
 
1306
+ if XLA_AVAILABLE:
1307
+ xm.mark_step()
1308
+
1297
1309
  # If we do sequential model offloading, let's offload unet and controlnet
1298
1310
  # manually for max memory savings
1299
1311
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
@@ -32,6 +32,7 @@ from ...schedulers import KarrasDiffusionSchedulers
32
32
  from ...utils import (
33
33
  USE_PEFT_BACKEND,
34
34
  deprecate,
35
+ is_torch_xla_available,
35
36
  logging,
36
37
  replace_example_docstring,
37
38
  scale_lora_layers,
@@ -43,6 +44,13 @@ from ..stable_diffusion import StableDiffusionPipelineOutput
43
44
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
44
45
 
45
46
 
47
+ if is_torch_xla_available():
48
+ import torch_xla.core.xla_model as xm
49
+
50
+ XLA_AVAILABLE = True
51
+ else:
52
+ XLA_AVAILABLE = False
53
+
46
54
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
47
55
 
48
56
 
@@ -83,7 +91,7 @@ EXAMPLE_DOC_STRING = """
83
91
  ... "lllyasviel/control_v11p_sd15_inpaint", torch_dtype=torch.float16
84
92
  ... )
85
93
  >>> pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
86
- ... "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
94
+ ... "stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
87
95
  ... )
88
96
 
89
97
  >>> pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
@@ -141,11 +149,11 @@ class StableDiffusionControlNetInpaintPipeline(
141
149
  <Tip>
142
150
 
143
151
  This pipeline can be used with checkpoints that have been specifically fine-tuned for inpainting
144
- ([runwayml/stable-diffusion-inpainting](https://huggingface.co/runwayml/stable-diffusion-inpainting)) as well as
145
- default text-to-image Stable Diffusion checkpoints
146
- ([runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)). Default text-to-image
147
- Stable Diffusion checkpoints might be preferable for ControlNets that have been fine-tuned on those, such as
148
- [lllyasviel/control_v11p_sd15_inpaint](https://huggingface.co/lllyasviel/control_v11p_sd15_inpaint).
152
+ ([stable-diffusion-v1-5/stable-diffusion-inpainting](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting))
153
+ as well as default text-to-image Stable Diffusion checkpoints
154
+ ([stable-diffusion-v1-5/stable-diffusion-v1-5](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5)).
155
+ Default text-to-image Stable Diffusion checkpoints might be preferable for ControlNets that have been fine-tuned on
156
+ those, such as [lllyasviel/control_v11p_sd15_inpaint](https://huggingface.co/lllyasviel/control_v11p_sd15_inpaint).
149
157
 
150
158
  </Tip>
151
159
 
@@ -167,8 +175,8 @@ class StableDiffusionControlNetInpaintPipeline(
167
175
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
168
176
  safety_checker ([`StableDiffusionSafetyChecker`]):
169
177
  Classification module that estimates whether generated images could be considered offensive or harmful.
170
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
171
- about a model's potential harms.
178
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
179
+ more details about a model's potential harms.
172
180
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
173
181
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
174
182
  """
@@ -176,7 +184,14 @@ class StableDiffusionControlNetInpaintPipeline(
176
184
  model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae"
177
185
  _optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
178
186
  _exclude_from_cpu_offload = ["safety_checker"]
179
- _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
187
+ _callback_tensor_inputs = [
188
+ "latents",
189
+ "prompt_embeds",
190
+ "negative_prompt_embeds",
191
+ "control_image",
192
+ "mask",
193
+ "masked_image_latents",
194
+ ]
180
195
 
181
196
  def __init__(
182
197
  self,
@@ -223,7 +238,7 @@ class StableDiffusionControlNetInpaintPipeline(
223
238
  feature_extractor=feature_extractor,
224
239
  image_encoder=image_encoder,
225
240
  )
226
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
241
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
227
242
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
228
243
  self.mask_processor = VaeImageProcessor(
229
244
  vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
@@ -642,7 +657,7 @@ class StableDiffusionControlNetInpaintPipeline(
642
657
  if padding_mask_crop is not None:
643
658
  if not isinstance(image, PIL.Image.Image):
644
659
  raise ValueError(
645
- f"The image should be a PIL image when inpainting mask crop, but is of type" f" {type(image)}."
660
+ f"The image should be a PIL image when inpainting mask crop, but is of type {type(image)}."
646
661
  )
647
662
  if not isinstance(mask_image, PIL.Image.Image):
648
663
  raise ValueError(
@@ -650,7 +665,7 @@ class StableDiffusionControlNetInpaintPipeline(
650
665
  f" {type(mask_image)}."
651
666
  )
652
667
  if output_type != "pil":
653
- raise ValueError(f"The output type should be PIL when inpainting mask crop, but is" f" {output_type}.")
668
+ raise ValueError(f"The output type should be PIL when inpainting mask crop, but is {output_type}.")
654
669
 
655
670
  # `prompt` needs more sophisticated handling when there are multiple
656
671
  # conditionings.
@@ -1468,6 +1483,7 @@ class StableDiffusionControlNetInpaintPipeline(
1468
1483
  latents = callback_outputs.pop("latents", latents)
1469
1484
  prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
1470
1485
  negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
1486
+ control_image = callback_outputs.pop("control_image", control_image)
1471
1487
 
1472
1488
  # call the callback, if provided
1473
1489
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
@@ -1476,6 +1492,9 @@ class StableDiffusionControlNetInpaintPipeline(
1476
1492
  step_idx = i // getattr(self.scheduler, "order", 1)
1477
1493
  callback(step_idx, t, latents)
1478
1494
 
1495
+ if XLA_AVAILABLE:
1496
+ xm.mark_step()
1497
+
1479
1498
  # If we do sequential model offloading, let's offload unet and controlnet
1480
1499
  # manually for max memory savings
1481
1500
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
@@ -60,6 +60,16 @@ if is_invisible_watermark_available():
60
60
  from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
61
61
 
62
62
 
63
+ from ...utils import is_torch_xla_available
64
+
65
+
66
+ if is_torch_xla_available():
67
+ import torch_xla.core.xla_model as xm
68
+
69
+ XLA_AVAILABLE = True
70
+ else:
71
+ XLA_AVAILABLE = False
72
+
63
73
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
64
74
 
65
75
 
@@ -227,6 +237,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
227
237
  "add_neg_time_ids",
228
238
  "mask",
229
239
  "masked_image_latents",
240
+ "control_image",
230
241
  ]
231
242
 
232
243
  def __init__(
@@ -264,7 +275,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
264
275
  )
265
276
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
266
277
  self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
267
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
278
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
268
279
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
269
280
  self.mask_processor = VaeImageProcessor(
270
281
  vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
@@ -406,7 +417,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
406
417
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
407
418
 
408
419
  # We are only ALWAYS interested in the pooled output of the final text encoder
409
- pooled_prompt_embeds = prompt_embeds[0]
420
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
421
+ pooled_prompt_embeds = prompt_embeds[0]
422
+
410
423
  if clip_skip is None:
411
424
  prompt_embeds = prompt_embeds.hidden_states[-2]
412
425
  else:
@@ -465,8 +478,10 @@ class StableDiffusionXLControlNetInpaintPipeline(
465
478
  uncond_input.input_ids.to(device),
466
479
  output_hidden_states=True,
467
480
  )
481
+
468
482
  # We are only ALWAYS interested in the pooled output of the final text encoder
469
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
483
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
484
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
470
485
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
471
486
 
472
487
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -729,7 +744,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
729
744
  if padding_mask_crop is not None:
730
745
  if not isinstance(image, PIL.Image.Image):
731
746
  raise ValueError(
732
- f"The image should be a PIL image when inpainting mask crop, but is of type" f" {type(image)}."
747
+ f"The image should be a PIL image when inpainting mask crop, but is of type {type(image)}."
733
748
  )
734
749
  if not isinstance(mask_image, PIL.Image.Image):
735
750
  raise ValueError(
@@ -737,7 +752,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
737
752
  f" {type(mask_image)}."
738
753
  )
739
754
  if output_type != "pil":
740
- raise ValueError(f"The output type should be PIL when inpainting mask crop, but is" f" {output_type}.")
755
+ raise ValueError(f"The output type should be PIL when inpainting mask crop, but is {output_type}.")
741
756
 
742
757
  if prompt_embeds is not None and pooled_prompt_embeds is None:
743
758
  raise ValueError(
@@ -1622,7 +1637,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
1622
1637
 
1623
1638
  # 8. Check that sizes of mask, masked image and latents match
1624
1639
  if num_channels_unet == 9:
1625
- # default case for runwayml/stable-diffusion-inpainting
1640
+ # default case for stable-diffusion-v1-5/stable-diffusion-inpainting
1626
1641
  num_channels_mask = mask.shape[1]
1627
1642
  num_channels_masked_image = masked_image_latents.shape[1]
1628
1643
  if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels:
@@ -1630,7 +1645,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
1630
1645
  f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
1631
1646
  f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
1632
1647
  f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
1633
- f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of"
1648
+ f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
1634
1649
  " `pipeline.unet` or your `mask_image` or `image` input."
1635
1650
  )
1636
1651
  elif num_channels_unet != 4:
@@ -1821,6 +1836,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
1821
1836
  latents = callback_outputs.pop("latents", latents)
1822
1837
  prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
1823
1838
  negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
1839
+ control_image = callback_outputs.pop("control_image", control_image)
1824
1840
 
1825
1841
  # call the callback, if provided
1826
1842
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
@@ -1829,6 +1845,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
1829
1845
  step_idx = i // getattr(self.scheduler, "order", 1)
1830
1846
  callback(step_idx, t, latents)
1831
1847
 
1848
+ if XLA_AVAILABLE:
1849
+ xm.mark_step()
1850
+
1832
1851
  # make sure the VAE is in float32 mode, as it overflows in float16
1833
1852
  if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
1834
1853
  self.upcast_vae()
@@ -62,6 +62,16 @@ if is_invisible_watermark_available():
62
62
  from ..stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
63
63
 
64
64
 
65
+ from ...utils import is_torch_xla_available
66
+
67
+
68
+ if is_torch_xla_available():
69
+ import torch_xla.core.xla_model as xm
70
+
71
+ XLA_AVAILABLE = True
72
+ else:
73
+ XLA_AVAILABLE = False
74
+
65
75
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
66
76
 
67
77
 
@@ -275,7 +285,7 @@ class StableDiffusionXLControlNetPipeline(
275
285
  feature_extractor=feature_extractor,
276
286
  image_encoder=image_encoder,
277
287
  )
278
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
288
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
279
289
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
280
290
  self.control_image_processor = VaeImageProcessor(
281
291
  vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
@@ -415,7 +425,9 @@ class StableDiffusionXLControlNetPipeline(
415
425
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
416
426
 
417
427
  # We are only ALWAYS interested in the pooled output of the final text encoder
418
- pooled_prompt_embeds = prompt_embeds[0]
428
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
429
+ pooled_prompt_embeds = prompt_embeds[0]
430
+
419
431
  if clip_skip is None:
420
432
  prompt_embeds = prompt_embeds.hidden_states[-2]
421
433
  else:
@@ -474,8 +486,10 @@ class StableDiffusionXLControlNetPipeline(
474
486
  uncond_input.input_ids.to(device),
475
487
  output_hidden_states=True,
476
488
  )
489
+
477
490
  # We are only ALWAYS interested in the pooled output of the final text encoder
478
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
491
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
492
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
479
493
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
480
494
 
481
495
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -1548,6 +1562,9 @@ class StableDiffusionXLControlNetPipeline(
1548
1562
  step_idx = i // getattr(self.scheduler, "order", 1)
1549
1563
  callback(step_idx, t, latents)
1550
1564
 
1565
+ if XLA_AVAILABLE:
1566
+ xm.mark_step()
1567
+
1551
1568
  if not output_type == "latent":
1552
1569
  # make sure the VAE is in float32 mode, as it overflows in float16
1553
1570
  needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
@@ -62,6 +62,16 @@ if is_invisible_watermark_available():
62
62
  from ..stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
63
63
 
64
64
 
65
+ from ...utils import is_torch_xla_available
66
+
67
+
68
+ if is_torch_xla_available():
69
+ import torch_xla.core.xla_model as xm
70
+
71
+ XLA_AVAILABLE = True
72
+ else:
73
+ XLA_AVAILABLE = False
74
+
65
75
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
66
76
 
67
77
 
@@ -232,6 +242,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
232
242
  "add_time_ids",
233
243
  "negative_pooled_prompt_embeds",
234
244
  "add_neg_time_ids",
245
+ "control_image",
235
246
  ]
236
247
 
237
248
  def __init__(
@@ -267,7 +278,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
267
278
  feature_extractor=feature_extractor,
268
279
  image_encoder=image_encoder,
269
280
  )
270
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
281
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
271
282
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
272
283
  self.control_image_processor = VaeImageProcessor(
273
284
  vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
@@ -408,7 +419,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
408
419
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
409
420
 
410
421
  # We are only ALWAYS interested in the pooled output of the final text encoder
411
- pooled_prompt_embeds = prompt_embeds[0]
422
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
423
+ pooled_prompt_embeds = prompt_embeds[0]
424
+
412
425
  if clip_skip is None:
413
426
  prompt_embeds = prompt_embeds.hidden_states[-2]
414
427
  else:
@@ -467,8 +480,10 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
467
480
  uncond_input.input_ids.to(device),
468
481
  output_hidden_states=True,
469
482
  )
483
+
470
484
  # We are only ALWAYS interested in the pooled output of the final text encoder
471
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
485
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
486
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
472
487
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
473
488
 
474
489
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -1600,6 +1615,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1600
1615
  )
1601
1616
  add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
1602
1617
  add_neg_time_ids = callback_outputs.pop("add_neg_time_ids", add_neg_time_ids)
1618
+ control_image = callback_outputs.pop("control_image", control_image)
1603
1619
 
1604
1620
  # call the callback, if provided
1605
1621
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
@@ -1608,6 +1624,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1608
1624
  step_idx = i // getattr(self.scheduler, "order", 1)
1609
1625
  callback(step_idx, t, latents)
1610
1626
 
1627
+ if XLA_AVAILABLE:
1628
+ xm.mark_step()
1629
+
1611
1630
  # If we do sequential model offloading, let's offload unet and controlnet
1612
1631
  # manually for max memory savings
1613
1632
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: