diffusers 0.32.2__py3-none-any.whl → 0.33.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +121 -86
  13. diffusers/loaders/lora_conversion_utils.py +504 -44
  14. diffusers/loaders/lora_pipeline.py +1769 -181
  15. diffusers/loaders/peft.py +167 -57
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +646 -72
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +20 -7
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +595 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +724 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +727 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +9 -1
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +2 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/METADATA +21 -4
  384. diffusers-0.33.1.dist-info/RECORD +608 -0
  385. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.2.dist-info/RECORD +0 -550
  387. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/top_level.txt +0 -0
@@ -28,11 +28,26 @@ from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMix
28
28
  from ...models import AutoencoderKL, UNet2DConditionModel
29
29
  from ...models.lora import adjust_lora_scale_text_encoder
30
30
  from ...schedulers import KarrasDiffusionSchedulers
31
- from ...utils import PIL_INTERPOLATION, USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
31
+ from ...utils import (
32
+ PIL_INTERPOLATION,
33
+ USE_PEFT_BACKEND,
34
+ deprecate,
35
+ is_torch_xla_available,
36
+ logging,
37
+ scale_lora_layers,
38
+ unscale_lora_layers,
39
+ )
32
40
  from ...utils.torch_utils import randn_tensor
33
41
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
34
42
 
35
43
 
44
+ if is_torch_xla_available():
45
+ import torch_xla.core.xla_model as xm
46
+
47
+ XLA_AVAILABLE = True
48
+ else:
49
+ XLA_AVAILABLE = False
50
+
36
51
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
37
52
 
38
53
 
@@ -115,17 +130,21 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
115
130
  ):
116
131
  super().__init__()
117
132
 
118
- is_unet_version_less_0_9_0 = hasattr(unet.config, "_diffusers_version") and version.parse(
119
- version.parse(unet.config._diffusers_version).base_version
120
- ) < version.parse("0.9.0.dev0")
121
- is_unet_sample_size_less_64 = hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
133
+ is_unet_version_less_0_9_0 = (
134
+ unet is not None
135
+ and hasattr(unet.config, "_diffusers_version")
136
+ and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
137
+ )
138
+ is_unet_sample_size_less_64 = (
139
+ unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
140
+ )
122
141
  if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
123
142
  deprecation_message = (
124
143
  "The configuration file of the unet has set the default `sample_size` to smaller than"
125
144
  " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the"
126
145
  " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
127
- " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5"
128
- " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
146
+ " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
147
+ " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
129
148
  " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
130
149
  " in the config might lead to incorrect results in future versions. If you have downloaded this"
131
150
  " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
@@ -145,7 +164,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
145
164
  depth_estimator=depth_estimator,
146
165
  feature_extractor=feature_extractor,
147
166
  )
148
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
167
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
149
168
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
150
169
 
151
170
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
@@ -861,6 +880,9 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
861
880
  step_idx = i // getattr(self.scheduler, "order", 1)
862
881
  callback(step_idx, t, latents)
863
882
 
883
+ if XLA_AVAILABLE:
884
+ xm.mark_step()
885
+
864
886
  if not output_type == "latent":
865
887
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
866
888
  else:
@@ -24,13 +24,20 @@ from ...configuration_utils import FrozenDict
24
24
  from ...image_processor import VaeImageProcessor
25
25
  from ...models import AutoencoderKL, UNet2DConditionModel
26
26
  from ...schedulers import KarrasDiffusionSchedulers
27
- from ...utils import deprecate, logging
27
+ from ...utils import deprecate, is_torch_xla_available, logging
28
28
  from ...utils.torch_utils import randn_tensor
29
29
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
30
30
  from . import StableDiffusionPipelineOutput
31
31
  from .safety_checker import StableDiffusionSafetyChecker
32
32
 
33
33
 
34
+ if is_torch_xla_available():
35
+ import torch_xla.core.xla_model as xm
36
+
37
+ XLA_AVAILABLE = True
38
+ else:
39
+ XLA_AVAILABLE = False
40
+
34
41
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
35
42
 
36
43
 
@@ -57,8 +64,8 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
57
64
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
58
65
  safety_checker ([`StableDiffusionSafetyChecker`]):
59
66
  Classification module that estimates whether generated images could be considered offensive or harmful.
60
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
61
- about a model's potential harms.
67
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
68
+ more details about a model's potential harms.
62
69
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
63
70
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
64
71
  """
@@ -97,17 +104,21 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
97
104
  " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
98
105
  )
99
106
 
100
- is_unet_version_less_0_9_0 = hasattr(unet.config, "_diffusers_version") and version.parse(
101
- version.parse(unet.config._diffusers_version).base_version
102
- ) < version.parse("0.9.0.dev0")
103
- is_unet_sample_size_less_64 = hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
107
+ is_unet_version_less_0_9_0 = (
108
+ unet is not None
109
+ and hasattr(unet.config, "_diffusers_version")
110
+ and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
111
+ )
112
+ is_unet_sample_size_less_64 = (
113
+ unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
114
+ )
104
115
  if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
105
116
  deprecation_message = (
106
117
  "The configuration file of the unet has set the default `sample_size` to smaller than"
107
118
  " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the"
108
119
  " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
109
- " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5"
110
- " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
120
+ " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
121
+ " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
111
122
  " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
112
123
  " in the config might lead to incorrect results in future versions. If you have downloaded this"
113
124
  " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
@@ -126,7 +137,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
126
137
  safety_checker=safety_checker,
127
138
  feature_extractor=feature_extractor,
128
139
  )
129
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
140
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
130
141
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
131
142
  self.register_to_config(requires_safety_checker=requires_safety_checker)
132
143
 
@@ -401,6 +412,9 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
401
412
  step_idx = i // getattr(self.scheduler, "order", 1)
402
413
  callback(step_idx, t, latents)
403
414
 
415
+ if XLA_AVAILABLE:
416
+ xm.mark_step()
417
+
404
418
  self.maybe_free_model_hooks()
405
419
 
406
420
  if not output_type == "latent":
@@ -32,6 +32,7 @@ from ...utils import (
32
32
  PIL_INTERPOLATION,
33
33
  USE_PEFT_BACKEND,
34
34
  deprecate,
35
+ is_torch_xla_available,
35
36
  logging,
36
37
  replace_example_docstring,
37
38
  scale_lora_layers,
@@ -43,8 +44,16 @@ from . import StableDiffusionPipelineOutput
43
44
  from .safety_checker import StableDiffusionSafetyChecker
44
45
 
45
46
 
47
+ if is_torch_xla_available():
48
+ import torch_xla.core.xla_model as xm
49
+
50
+ XLA_AVAILABLE = True
51
+ else:
52
+ XLA_AVAILABLE = False
53
+
46
54
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
47
55
 
56
+
48
57
  EXAMPLE_DOC_STRING = """
49
58
  Examples:
50
59
  ```py
@@ -56,7 +65,7 @@ EXAMPLE_DOC_STRING = """
56
65
  >>> from diffusers import StableDiffusionImg2ImgPipeline
57
66
 
58
67
  >>> device = "cuda"
59
- >>> model_id_or_path = "runwayml/stable-diffusion-v1-5"
68
+ >>> model_id_or_path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
60
69
  >>> pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16)
61
70
  >>> pipe = pipe.to(device)
62
71
 
@@ -205,8 +214,8 @@ class StableDiffusionImg2ImgPipeline(
205
214
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
206
215
  safety_checker ([`StableDiffusionSafetyChecker`]):
207
216
  Classification module that estimates whether generated images could be considered offensive or harmful.
208
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
209
- about a model's potential harms.
217
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
218
+ more details about a model's potential harms.
210
219
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
211
220
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
212
221
  """
@@ -230,7 +239,7 @@ class StableDiffusionImg2ImgPipeline(
230
239
  ):
231
240
  super().__init__()
232
241
 
233
- if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
242
+ if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
234
243
  deprecation_message = (
235
244
  f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
236
245
  f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
@@ -244,7 +253,7 @@ class StableDiffusionImg2ImgPipeline(
244
253
  new_config["steps_offset"] = 1
245
254
  scheduler._internal_dict = FrozenDict(new_config)
246
255
 
247
- if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True:
256
+ if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True:
248
257
  deprecation_message = (
249
258
  f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
250
259
  " `clip_sample` should be set to False in the configuration file. Please make sure to update the"
@@ -273,17 +282,21 @@ class StableDiffusionImg2ImgPipeline(
273
282
  " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
274
283
  )
275
284
 
276
- is_unet_version_less_0_9_0 = hasattr(unet.config, "_diffusers_version") and version.parse(
277
- version.parse(unet.config._diffusers_version).base_version
278
- ) < version.parse("0.9.0.dev0")
279
- is_unet_sample_size_less_64 = hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
285
+ is_unet_version_less_0_9_0 = (
286
+ unet is not None
287
+ and hasattr(unet.config, "_diffusers_version")
288
+ and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
289
+ )
290
+ is_unet_sample_size_less_64 = (
291
+ unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
292
+ )
280
293
  if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
281
294
  deprecation_message = (
282
295
  "The configuration file of the unet has set the default `sample_size` to smaller than"
283
296
  " 64 which seems highly unlikely. If your checkpoint is a fine-tuned version of any of the"
284
297
  " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
285
- " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5"
286
- " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
298
+ " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
299
+ " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
287
300
  " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
288
301
  " in the config might lead to incorrect results in future versions. If you have downloaded this"
289
302
  " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
@@ -304,7 +317,7 @@ class StableDiffusionImg2ImgPipeline(
304
317
  feature_extractor=feature_extractor,
305
318
  image_encoder=image_encoder,
306
319
  )
307
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
320
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
308
321
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
309
322
  self.register_to_config(requires_safety_checker=requires_safety_checker)
310
323
 
@@ -1120,6 +1133,9 @@ class StableDiffusionImg2ImgPipeline(
1120
1133
  step_idx = i // getattr(self.scheduler, "order", 1)
1121
1134
  callback(step_idx, t, latents)
1122
1135
 
1136
+ if XLA_AVAILABLE:
1137
+ xm.mark_step()
1138
+
1123
1139
  if not output_type == "latent":
1124
1140
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
1125
1141
  0
@@ -27,13 +27,27 @@ from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraL
27
27
  from ...models import AsymmetricAutoencoderKL, AutoencoderKL, ImageProjection, UNet2DConditionModel
28
28
  from ...models.lora import adjust_lora_scale_text_encoder
29
29
  from ...schedulers import KarrasDiffusionSchedulers
30
- from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
30
+ from ...utils import (
31
+ USE_PEFT_BACKEND,
32
+ deprecate,
33
+ is_torch_xla_available,
34
+ logging,
35
+ scale_lora_layers,
36
+ unscale_lora_layers,
37
+ )
31
38
  from ...utils.torch_utils import randn_tensor
32
39
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
33
40
  from . import StableDiffusionPipelineOutput
34
41
  from .safety_checker import StableDiffusionSafetyChecker
35
42
 
36
43
 
44
+ if is_torch_xla_available():
45
+ import torch_xla.core.xla_model as xm
46
+
47
+ XLA_AVAILABLE = True
48
+ else:
49
+ XLA_AVAILABLE = False
50
+
37
51
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
38
52
 
39
53
 
@@ -146,8 +160,8 @@ class StableDiffusionInpaintPipeline(
146
160
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
147
161
  safety_checker ([`StableDiffusionSafetyChecker`]):
148
162
  Classification module that estimates whether generated images could be considered offensive or harmful.
149
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
150
- about a model's potential harms.
163
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
164
+ more details about a model's potential harms.
151
165
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
152
166
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
153
167
  """
@@ -171,7 +185,7 @@ class StableDiffusionInpaintPipeline(
171
185
  ):
172
186
  super().__init__()
173
187
 
174
- if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
188
+ if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
175
189
  deprecation_message = (
176
190
  f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
177
191
  f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
@@ -185,7 +199,7 @@ class StableDiffusionInpaintPipeline(
185
199
  new_config["steps_offset"] = 1
186
200
  scheduler._internal_dict = FrozenDict(new_config)
187
201
 
188
- if hasattr(scheduler.config, "skip_prk_steps") and scheduler.config.skip_prk_steps is False:
202
+ if scheduler is not None and getattr(scheduler.config, "skip_prk_steps", True) is False:
189
203
  deprecation_message = (
190
204
  f"The configuration file of this scheduler: {scheduler} has not set the configuration"
191
205
  " `skip_prk_steps`. `skip_prk_steps` should be set to True in the configuration file. Please make"
@@ -215,17 +229,21 @@ class StableDiffusionInpaintPipeline(
215
229
  " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
216
230
  )
217
231
 
218
- is_unet_version_less_0_9_0 = hasattr(unet.config, "_diffusers_version") and version.parse(
219
- version.parse(unet.config._diffusers_version).base_version
220
- ) < version.parse("0.9.0.dev0")
221
- is_unet_sample_size_less_64 = hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
232
+ is_unet_version_less_0_9_0 = (
233
+ unet is not None
234
+ and hasattr(unet.config, "_diffusers_version")
235
+ and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
236
+ )
237
+ is_unet_sample_size_less_64 = (
238
+ unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
239
+ )
222
240
  if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
223
241
  deprecation_message = (
224
242
  "The configuration file of the unet has set the default `sample_size` to smaller than"
225
243
  " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the"
226
244
  " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
227
- " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5"
228
- " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
245
+ " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
246
+ " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
229
247
  " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
230
248
  " in the config might lead to incorrect results in future versions. If you have downloaded this"
231
249
  " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
@@ -237,7 +255,7 @@ class StableDiffusionInpaintPipeline(
237
255
  unet._internal_dict = FrozenDict(new_config)
238
256
 
239
257
  # Check shapes, assume num_channels_latents == 4, num_channels_mask == 1, num_channels_masked == 4
240
- if unet.config.in_channels != 9:
258
+ if unet is not None and unet.config.in_channels != 9:
241
259
  logger.info(f"You have loaded a UNet with {unet.config.in_channels} input channels which.")
242
260
 
243
261
  self.register_modules(
@@ -250,7 +268,7 @@ class StableDiffusionInpaintPipeline(
250
268
  feature_extractor=feature_extractor,
251
269
  image_encoder=image_encoder,
252
270
  )
253
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
271
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
254
272
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
255
273
  self.mask_processor = VaeImageProcessor(
256
274
  vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
@@ -642,7 +660,7 @@ class StableDiffusionInpaintPipeline(
642
660
  if padding_mask_crop is not None:
643
661
  if not isinstance(image, PIL.Image.Image):
644
662
  raise ValueError(
645
- f"The image should be a PIL image when inpainting mask crop, but is of type" f" {type(image)}."
663
+ f"The image should be a PIL image when inpainting mask crop, but is of type {type(image)}."
646
664
  )
647
665
  if not isinstance(mask_image, PIL.Image.Image):
648
666
  raise ValueError(
@@ -650,7 +668,7 @@ class StableDiffusionInpaintPipeline(
650
668
  f" {type(mask_image)}."
651
669
  )
652
670
  if output_type != "pil":
653
- raise ValueError(f"The output type should be PIL when inpainting mask crop, but is" f" {output_type}.")
671
+ raise ValueError(f"The output type should be PIL when inpainting mask crop, but is {output_type}.")
654
672
 
655
673
  if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
656
674
  raise ValueError(
@@ -1014,7 +1032,7 @@ class StableDiffusionInpaintPipeline(
1014
1032
  >>> mask_image = download_image(mask_url).resize((512, 512))
1015
1033
 
1016
1034
  >>> pipe = StableDiffusionInpaintPipeline.from_pretrained(
1017
- ... "runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16
1035
+ ... "stable-diffusion-v1-5/stable-diffusion-inpainting", torch_dtype=torch.float16
1018
1036
  ... )
1019
1037
  >>> pipe = pipe.to("cuda")
1020
1038
 
@@ -1200,7 +1218,7 @@ class StableDiffusionInpaintPipeline(
1200
1218
 
1201
1219
  # 8. Check that sizes of mask, masked image and latents match
1202
1220
  if num_channels_unet == 9:
1203
- # default case for runwayml/stable-diffusion-inpainting
1221
+ # default case for stable-diffusion-v1-5/stable-diffusion-inpainting
1204
1222
  num_channels_mask = mask.shape[1]
1205
1223
  num_channels_masked_image = masked_image_latents.shape[1]
1206
1224
  if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels:
@@ -1208,7 +1226,7 @@ class StableDiffusionInpaintPipeline(
1208
1226
  f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
1209
1227
  f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
1210
1228
  f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
1211
- f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of"
1229
+ f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
1212
1230
  " `pipeline.unet` or your `mask_image` or `image` input."
1213
1231
  )
1214
1232
  elif num_channels_unet != 4:
@@ -1303,6 +1321,9 @@ class StableDiffusionInpaintPipeline(
1303
1321
  step_idx = i // getattr(self.scheduler, "order", 1)
1304
1322
  callback(step_idx, t, latents)
1305
1323
 
1324
+ if XLA_AVAILABLE:
1325
+ xm.mark_step()
1326
+
1306
1327
  if not output_type == "latent":
1307
1328
  condition_kwargs = {}
1308
1329
  if isinstance(self.vae, AsymmetricAutoencoderKL):
@@ -22,16 +22,23 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
22
22
 
23
23
  from ...callbacks import MultiPipelineCallbacks, PipelineCallback
24
24
  from ...image_processor import PipelineImageInput, VaeImageProcessor
25
- from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
25
+ from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
26
26
  from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
27
27
  from ...schedulers import KarrasDiffusionSchedulers
28
- from ...utils import PIL_INTERPOLATION, deprecate, logging
28
+ from ...utils import PIL_INTERPOLATION, deprecate, is_torch_xla_available, logging
29
29
  from ...utils.torch_utils import randn_tensor
30
30
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
31
31
  from . import StableDiffusionPipelineOutput
32
32
  from .safety_checker import StableDiffusionSafetyChecker
33
33
 
34
34
 
35
+ if is_torch_xla_available():
36
+ import torch_xla.core.xla_model as xm
37
+
38
+ XLA_AVAILABLE = True
39
+ else:
40
+ XLA_AVAILABLE = False
41
+
35
42
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
36
43
 
37
44
 
@@ -79,6 +86,7 @@ class StableDiffusionInstructPix2PixPipeline(
79
86
  TextualInversionLoaderMixin,
80
87
  StableDiffusionLoraLoaderMixin,
81
88
  IPAdapterMixin,
89
+ FromSingleFileMixin,
82
90
  ):
83
91
  r"""
84
92
  Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion).
@@ -106,8 +114,8 @@ class StableDiffusionInstructPix2PixPipeline(
106
114
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
107
115
  safety_checker ([`StableDiffusionSafetyChecker`]):
108
116
  Classification module that estimates whether generated images could be considered offensive or harmful.
109
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
110
- about a model's potential harms.
117
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
118
+ more details about a model's potential harms.
111
119
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
112
120
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
113
121
  """
@@ -157,7 +165,7 @@ class StableDiffusionInstructPix2PixPipeline(
157
165
  feature_extractor=feature_extractor,
158
166
  image_encoder=image_encoder,
159
167
  )
160
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
168
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
161
169
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
162
170
  self.register_to_config(requires_safety_checker=requires_safety_checker)
163
171
 
@@ -393,7 +401,7 @@ class StableDiffusionInstructPix2PixPipeline(
393
401
  f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
394
402
  f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
395
403
  f" `num_channels_image`: {num_channels_image} "
396
- f" = {num_channels_latents+num_channels_image}. Please verify the config of"
404
+ f" = {num_channels_latents + num_channels_image}. Please verify the config of"
397
405
  " `pipeline.unet` or your `image` input."
398
406
  )
399
407
 
@@ -457,6 +465,9 @@ class StableDiffusionInstructPix2PixPipeline(
457
465
  step_idx = i // getattr(self.scheduler, "order", 1)
458
466
  callback(step_idx, t, latents)
459
467
 
468
+ if XLA_AVAILABLE:
469
+ xm.mark_step()
470
+
460
471
  if not output_type == "latent":
461
472
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
462
473
  image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
@@ -25,11 +25,18 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
25
25
  from ...loaders import FromSingleFileMixin
26
26
  from ...models import AutoencoderKL, UNet2DConditionModel
27
27
  from ...schedulers import EulerDiscreteScheduler
28
- from ...utils import deprecate, logging
28
+ from ...utils import deprecate, is_torch_xla_available, logging
29
29
  from ...utils.torch_utils import randn_tensor
30
30
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffusionMixin
31
31
 
32
32
 
33
+ if is_torch_xla_available():
34
+ import torch_xla.core.xla_model as xm
35
+
36
+ XLA_AVAILABLE = True
37
+ else:
38
+ XLA_AVAILABLE = False
39
+
33
40
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
34
41
 
35
42
 
@@ -116,7 +123,7 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, StableDiffusionMix
116
123
  unet=unet,
117
124
  scheduler=scheduler,
118
125
  )
119
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
126
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
120
127
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic")
121
128
 
122
129
  def _encode_prompt(
@@ -593,7 +600,7 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, StableDiffusionMix
593
600
  f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
594
601
  f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
595
602
  f" `num_channels_image`: {num_channels_image} "
596
- f" = {num_channels_latents+num_channels_image}. Please verify the config of"
603
+ f" = {num_channels_latents + num_channels_image}. Please verify the config of"
597
604
  " `pipeline.unet` or your `image` input."
598
605
  )
599
606
 
@@ -640,6 +647,9 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, StableDiffusionMix
640
647
  step_idx = i // getattr(self.scheduler, "order", 1)
641
648
  callback(step_idx, t, latents)
642
649
 
650
+ if XLA_AVAILABLE:
651
+ xm.mark_step()
652
+
643
653
  if not output_type == "latent":
644
654
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
645
655
  else:
@@ -30,12 +30,26 @@ from ...models.attention_processor import (
30
30
  )
31
31
  from ...models.lora import adjust_lora_scale_text_encoder
32
32
  from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
33
- from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
33
+ from ...utils import (
34
+ USE_PEFT_BACKEND,
35
+ deprecate,
36
+ is_torch_xla_available,
37
+ logging,
38
+ scale_lora_layers,
39
+ unscale_lora_layers,
40
+ )
34
41
  from ...utils.torch_utils import randn_tensor
35
42
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
36
43
  from . import StableDiffusionPipelineOutput
37
44
 
38
45
 
46
+ if is_torch_xla_available():
47
+ import torch_xla.core.xla_model as xm
48
+
49
+ XLA_AVAILABLE = True
50
+ else:
51
+ XLA_AVAILABLE = False
52
+
39
53
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
40
54
 
41
55
 
@@ -149,7 +163,7 @@ class StableDiffusionUpscalePipeline(
149
163
  watermarker=watermarker,
150
164
  feature_extractor=feature_extractor,
151
165
  )
152
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
166
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
153
167
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic")
154
168
  self.register_to_config(max_noise_level=max_noise_level)
155
169
 
@@ -726,7 +740,7 @@ class StableDiffusionUpscalePipeline(
726
740
  f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
727
741
  f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
728
742
  f" `num_channels_image`: {num_channels_image} "
729
- f" = {num_channels_latents+num_channels_image}. Please verify the config of"
743
+ f" = {num_channels_latents + num_channels_image}. Please verify the config of"
730
744
  " `pipeline.unet` or your `image` input."
731
745
  )
732
746
 
@@ -769,6 +783,9 @@ class StableDiffusionUpscalePipeline(
769
783
  step_idx = i // getattr(self.scheduler, "order", 1)
770
784
  callback(step_idx, t, latents)
771
785
 
786
+ if XLA_AVAILABLE:
787
+ xm.mark_step()
788
+
772
789
  if not output_type == "latent":
773
790
  # make sure the VAE is in float32 mode, as it overflows in float16
774
791
  needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
@@ -28,6 +28,7 @@ from ...schedulers import KarrasDiffusionSchedulers
28
28
  from ...utils import (
29
29
  USE_PEFT_BACKEND,
30
30
  deprecate,
31
+ is_torch_xla_available,
31
32
  logging,
32
33
  replace_example_docstring,
33
34
  scale_lora_layers,
@@ -38,8 +39,16 @@ from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffu
38
39
  from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
39
40
 
40
41
 
42
+ if is_torch_xla_available():
43
+ import torch_xla.core.xla_model as xm
44
+
45
+ XLA_AVAILABLE = True
46
+ else:
47
+ XLA_AVAILABLE = False
48
+
41
49
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
42
50
 
51
+
43
52
  EXAMPLE_DOC_STRING = """
44
53
  Examples:
45
54
  ```py
@@ -132,7 +141,7 @@ class StableUnCLIPPipeline(
132
141
  image_noising_scheduler: KarrasDiffusionSchedulers,
133
142
  # regular denoising components
134
143
  tokenizer: CLIPTokenizer,
135
- text_encoder: CLIPTextModelWithProjection,
144
+ text_encoder: CLIPTextModel,
136
145
  unet: UNet2DConditionModel,
137
146
  scheduler: KarrasDiffusionSchedulers,
138
147
  # vae
@@ -154,7 +163,7 @@ class StableUnCLIPPipeline(
154
163
  vae=vae,
155
164
  )
156
165
 
157
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
166
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
158
167
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
159
168
 
160
169
  # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt with _encode_prompt->_encode_prior_prompt, tokenizer->prior_tokenizer, text_encoder->prior_text_encoder
@@ -924,6 +933,9 @@ class StableUnCLIPPipeline(
924
933
  step_idx = i // getattr(self.scheduler, "order", 1)
925
934
  callback(step_idx, t, latents)
926
935
 
936
+ if XLA_AVAILABLE:
937
+ xm.mark_step()
938
+
927
939
  if not output_type == "latent":
928
940
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
929
941
  else: