diffusers 0.32.2__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +121 -86
  13. diffusers/loaders/lora_conversion_utils.py +504 -44
  14. diffusers/loaders/lora_pipeline.py +1769 -181
  15. diffusers/loaders/peft.py +167 -57
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +646 -72
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +20 -7
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +9 -1
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +2 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.2.dist-info/RECORD +0 -550
  387. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -23,6 +23,7 @@ except OptionalDependencyNotAvailable:
23
23
  else:
24
24
  _import_structure["marigold_image_processing"] = ["MarigoldImageProcessor"]
25
25
  _import_structure["pipeline_marigold_depth"] = ["MarigoldDepthOutput", "MarigoldDepthPipeline"]
26
+ _import_structure["pipeline_marigold_intrinsics"] = ["MarigoldIntrinsicsOutput", "MarigoldIntrinsicsPipeline"]
26
27
  _import_structure["pipeline_marigold_normals"] = ["MarigoldNormalsOutput", "MarigoldNormalsPipeline"]
27
28
 
28
29
  if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
@@ -35,6 +36,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
35
36
  else:
36
37
  from .marigold_image_processing import MarigoldImageProcessor
37
38
  from .pipeline_marigold_depth import MarigoldDepthOutput, MarigoldDepthPipeline
39
+ from .pipeline_marigold_intrinsics import MarigoldIntrinsicsOutput, MarigoldIntrinsicsPipeline
38
40
  from .pipeline_marigold_normals import MarigoldNormalsOutput, MarigoldNormalsPipeline
39
41
 
40
42
  else:
@@ -1,4 +1,22 @@
1
- from typing import List, Optional, Tuple, Union
1
+ # Copyright 2023-2025 Marigold Team, ETH Zürich. All rights reserved.
2
+ # Copyright 2024-2025 The HuggingFace Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # --------------------------------------------------------------------------
16
+ # More information and citation instructions are available on the
17
+ # Marigold project website: https://marigoldcomputervision.github.io
18
+ # --------------------------------------------------------------------------
19
+ from typing import Any, Dict, List, Optional, Tuple, Union
2
20
 
3
21
  import numpy as np
4
22
  import PIL
@@ -379,7 +397,7 @@ class MarigoldImageProcessor(ConfigMixin):
379
397
  val_min: float = 0.0,
380
398
  val_max: float = 1.0,
381
399
  color_map: str = "Spectral",
382
- ) -> Union[PIL.Image.Image, List[PIL.Image.Image]]:
400
+ ) -> List[PIL.Image.Image]:
383
401
  """
384
402
  Visualizes depth maps, such as predictions of the `MarigoldDepthPipeline`.
385
403
 
@@ -391,7 +409,7 @@ class MarigoldImageProcessor(ConfigMixin):
391
409
  color_map (`str`, *optional*, defaults to `"Spectral"`): Color map used to convert a single-channel
392
410
  depth prediction into colored representation.
393
411
 
394
- Returns: `PIL.Image.Image` or `List[PIL.Image.Image]` with depth maps visualization.
412
+ Returns: `List[PIL.Image.Image]` with depth maps visualization.
395
413
  """
396
414
  if val_max <= val_min:
397
415
  raise ValueError(f"Invalid values range: [{val_min}, {val_max}].")
@@ -436,7 +454,7 @@ class MarigoldImageProcessor(ConfigMixin):
436
454
  depth: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
437
455
  val_min: float = 0.0,
438
456
  val_max: float = 1.0,
439
- ) -> Union[PIL.Image.Image, List[PIL.Image.Image]]:
457
+ ) -> List[PIL.Image.Image]:
440
458
  def export_depth_to_16bit_png_one(img, idx=None):
441
459
  prefix = "Depth" + (f"[{idx}]" if idx else "")
442
460
  if not isinstance(img, np.ndarray) and not torch.is_tensor(img):
@@ -478,7 +496,7 @@ class MarigoldImageProcessor(ConfigMixin):
478
496
  flip_x: bool = False,
479
497
  flip_y: bool = False,
480
498
  flip_z: bool = False,
481
- ) -> Union[PIL.Image.Image, List[PIL.Image.Image]]:
499
+ ) -> List[PIL.Image.Image]:
482
500
  """
483
501
  Visualizes surface normals, such as predictions of the `MarigoldNormalsPipeline`.
484
502
 
@@ -492,7 +510,7 @@ class MarigoldImageProcessor(ConfigMixin):
492
510
  flip_z (`bool`, *optional*, defaults to `False`): Flips the Z axis of the normals frame of reference.
493
511
  Default direction is facing the observer.
494
512
 
495
- Returns: `PIL.Image.Image` or `List[PIL.Image.Image]` with surface normals visualization.
513
+ Returns: `List[PIL.Image.Image]` with surface normals visualization.
496
514
  """
497
515
  flip_vec = None
498
516
  if any((flip_x, flip_y, flip_z)):
@@ -528,6 +546,99 @@ class MarigoldImageProcessor(ConfigMixin):
528
546
  else:
529
547
  raise ValueError(f"Unexpected input type: {type(normals)}")
530
548
 
549
+ @staticmethod
550
+ def visualize_intrinsics(
551
+ prediction: Union[
552
+ np.ndarray,
553
+ torch.Tensor,
554
+ List[np.ndarray],
555
+ List[torch.Tensor],
556
+ ],
557
+ target_properties: Dict[str, Any],
558
+ color_map: Union[str, Dict[str, str]] = "binary",
559
+ ) -> List[Dict[str, PIL.Image.Image]]:
560
+ """
561
+ Visualizes intrinsic image decomposition, such as predictions of the `MarigoldIntrinsicsPipeline`.
562
+
563
+ Args:
564
+ prediction (`Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]]`):
565
+ Intrinsic image decomposition.
566
+ target_properties (`Dict[str, Any]`):
567
+ Decomposition properties. Expected entries: `target_names: List[str]` and a dictionary with keys
568
+ `prediction_space: str`, `sub_target_names: List[Union[str, Null]]` (must have 3 entries, null for
569
+ missing modalities), `up_to_scale: bool`, one for each target and sub-target.
570
+ color_map (`Union[str, Dict[str, str]]`, *optional*, defaults to `"Spectral"`):
571
+ Color map used to convert a single-channel predictions into colored representations. When a dictionary
572
+ is passed, each modality can be colored with its own color map.
573
+
574
+ Returns: `List[Dict[str, PIL.Image.Image]]` with intrinsic image decomposition visualization.
575
+ """
576
+ if "target_names" not in target_properties:
577
+ raise ValueError("Missing `target_names` in target_properties")
578
+ if not isinstance(color_map, str) and not (
579
+ isinstance(color_map, dict)
580
+ and all(isinstance(k, str) and isinstance(v, str) for k, v in color_map.items())
581
+ ):
582
+ raise ValueError("`color_map` must be a string or a dictionary of strings")
583
+ n_targets = len(target_properties["target_names"])
584
+
585
+ def visualize_targets_one(images, idx=None):
586
+ # img: [T, 3, H, W]
587
+ out = {}
588
+ for target_name, img in zip(target_properties["target_names"], images):
589
+ img = img.permute(1, 2, 0) # [H, W, 3]
590
+ prediction_space = target_properties[target_name].get("prediction_space", "srgb")
591
+ if prediction_space == "stack":
592
+ sub_target_names = target_properties[target_name]["sub_target_names"]
593
+ if len(sub_target_names) != 3 or any(
594
+ not (isinstance(s, str) or s is None) for s in sub_target_names
595
+ ):
596
+ raise ValueError(f"Unexpected target sub-names {sub_target_names} in {target_name}")
597
+ for i, sub_target_name in enumerate(sub_target_names):
598
+ if sub_target_name is None:
599
+ continue
600
+ sub_img = img[:, :, i]
601
+ sub_prediction_space = target_properties[sub_target_name].get("prediction_space", "srgb")
602
+ if sub_prediction_space == "linear":
603
+ sub_up_to_scale = target_properties[sub_target_name].get("up_to_scale", False)
604
+ if sub_up_to_scale:
605
+ sub_img = sub_img / max(sub_img.max().item(), 1e-6)
606
+ sub_img = sub_img ** (1 / 2.2)
607
+ cmap_name = (
608
+ color_map if isinstance(color_map, str) else color_map.get(sub_target_name, "binary")
609
+ )
610
+ sub_img = MarigoldImageProcessor.colormap(sub_img, cmap=cmap_name, bytes=True)
611
+ sub_img = PIL.Image.fromarray(sub_img.cpu().numpy())
612
+ out[sub_target_name] = sub_img
613
+ elif prediction_space == "linear":
614
+ up_to_scale = target_properties[target_name].get("up_to_scale", False)
615
+ if up_to_scale:
616
+ img = img / max(img.max().item(), 1e-6)
617
+ img = img ** (1 / 2.2)
618
+ elif prediction_space == "srgb":
619
+ pass
620
+ img = (img * 255).to(dtype=torch.uint8, device="cpu").numpy()
621
+ img = PIL.Image.fromarray(img)
622
+ out[target_name] = img
623
+ return out
624
+
625
+ if prediction is None or isinstance(prediction, list) and any(o is None for o in prediction):
626
+ raise ValueError("Input prediction is `None`")
627
+ if isinstance(prediction, (np.ndarray, torch.Tensor)):
628
+ prediction = MarigoldImageProcessor.expand_tensor_or_array(prediction)
629
+ if isinstance(prediction, np.ndarray):
630
+ prediction = MarigoldImageProcessor.numpy_to_pt(prediction) # [N*T,3,H,W]
631
+ if not (prediction.ndim == 4 and prediction.shape[1] == 3 and prediction.shape[0] % n_targets == 0):
632
+ raise ValueError(f"Unexpected input shape={prediction.shape}, expecting [N*T,3,H,W].")
633
+ N_T, _, H, W = prediction.shape
634
+ N = N_T // n_targets
635
+ prediction = prediction.reshape(N, n_targets, 3, H, W)
636
+ return [visualize_targets_one(img, idx) for idx, img in enumerate(prediction)]
637
+ elif isinstance(prediction, list):
638
+ return [visualize_targets_one(img, idx) for idx, img in enumerate(prediction)]
639
+ else:
640
+ raise ValueError(f"Unexpected input type: {type(prediction)}")
641
+
531
642
  @staticmethod
532
643
  def visualize_uncertainty(
533
644
  uncertainty: Union[
@@ -537,9 +648,10 @@ class MarigoldImageProcessor(ConfigMixin):
537
648
  List[torch.Tensor],
538
649
  ],
539
650
  saturation_percentile=95,
540
- ) -> Union[PIL.Image.Image, List[PIL.Image.Image]]:
651
+ ) -> List[PIL.Image.Image]:
541
652
  """
542
- Visualizes dense uncertainties, such as produced by `MarigoldDepthPipeline` or `MarigoldNormalsPipeline`.
653
+ Visualizes dense uncertainties, such as produced by `MarigoldDepthPipeline`, `MarigoldNormalsPipeline`, or
654
+ `MarigoldIntrinsicsPipeline`.
543
655
 
544
656
  Args:
545
657
  uncertainty (`Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]]`):
@@ -547,14 +659,15 @@ class MarigoldImageProcessor(ConfigMixin):
547
659
  saturation_percentile (`int`, *optional*, defaults to `95`):
548
660
  Specifies the percentile uncertainty value visualized with maximum intensity.
549
661
 
550
- Returns: `PIL.Image.Image` or `List[PIL.Image.Image]` with uncertainty visualization.
662
+ Returns: `List[PIL.Image.Image]` with uncertainty visualization.
551
663
  """
552
664
 
553
665
  def visualize_uncertainty_one(img, idx=None):
554
666
  prefix = "Uncertainty" + (f"[{idx}]" if idx else "")
555
667
  if img.min() < 0:
556
- raise ValueError(f"{prefix}: unexected data range, min={img.min()}.")
557
- img = img.squeeze(0).cpu().numpy()
668
+ raise ValueError(f"{prefix}: unexpected data range, min={img.min()}.")
669
+ img = img.permute(1, 2, 0) # [H,W,C]
670
+ img = img.squeeze(2).cpu().numpy() # [H,W] or [H,W,3]
558
671
  saturation_value = np.percentile(img, saturation_percentile)
559
672
  img = np.clip(img * 255 / saturation_value, 0, 255)
560
673
  img = img.astype(np.uint8)
@@ -566,9 +679,9 @@ class MarigoldImageProcessor(ConfigMixin):
566
679
  if isinstance(uncertainty, (np.ndarray, torch.Tensor)):
567
680
  uncertainty = MarigoldImageProcessor.expand_tensor_or_array(uncertainty)
568
681
  if isinstance(uncertainty, np.ndarray):
569
- uncertainty = MarigoldImageProcessor.numpy_to_pt(uncertainty) # [N,1,H,W]
570
- if not (uncertainty.ndim == 4 and uncertainty.shape[1] == 1):
571
- raise ValueError(f"Unexpected input shape={uncertainty.shape}, expecting [N,1,H,W].")
682
+ uncertainty = MarigoldImageProcessor.numpy_to_pt(uncertainty) # [N,C,H,W]
683
+ if not (uncertainty.ndim == 4 and uncertainty.shape[1] in (1, 3)):
684
+ raise ValueError(f"Unexpected input shape={uncertainty.shape}, expecting [N,C,H,W] with C in (1,3).")
572
685
  return [visualize_uncertainty_one(img, idx) for idx, img in enumerate(uncertainty)]
573
686
  elif isinstance(uncertainty, list):
574
687
  return [visualize_uncertainty_one(img, idx) for idx, img in enumerate(uncertainty)]
@@ -1,5 +1,5 @@
1
- # Copyright 2024 Marigold authors, PRS ETH Zurich. All rights reserved.
2
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2023-2025 Marigold Team, ETH Zürich. All rights reserved.
2
+ # Copyright 2024-2025 The HuggingFace Team. All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
14
14
  # limitations under the License.
15
15
  # --------------------------------------------------------------------------
16
16
  # More information and citation instructions are available on the
17
- # Marigold project website: https://marigoldmonodepth.github.io
17
+ # Marigold project website: https://marigoldcomputervision.github.io
18
18
  # --------------------------------------------------------------------------
19
19
  from dataclasses import dataclass
20
20
  from functools import partial
@@ -37,6 +37,7 @@ from ...schedulers import (
37
37
  )
38
38
  from ...utils import (
39
39
  BaseOutput,
40
+ is_torch_xla_available,
40
41
  logging,
41
42
  replace_example_docstring,
42
43
  )
@@ -46,6 +47,13 @@ from ..pipeline_utils import DiffusionPipeline
46
47
  from .marigold_image_processing import MarigoldImageProcessor
47
48
 
48
49
 
50
+ if is_torch_xla_available():
51
+ import torch_xla.core.xla_model as xm
52
+
53
+ XLA_AVAILABLE = True
54
+ else:
55
+ XLA_AVAILABLE = False
56
+
49
57
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
50
58
 
51
59
 
@@ -56,7 +64,7 @@ Examples:
56
64
  >>> import torch
57
65
 
58
66
  >>> pipe = diffusers.MarigoldDepthPipeline.from_pretrained(
59
- ... "prs-eth/marigold-depth-lcm-v1-0", variant="fp16", torch_dtype=torch.float16
67
+ ... "prs-eth/marigold-depth-v1-1", variant="fp16", torch_dtype=torch.float16
60
68
  ... ).to("cuda")
61
69
 
62
70
  >>> image = diffusers.utils.load_image("https://marigoldmonodepth.github.io/images/einstein.jpg")
@@ -78,11 +86,12 @@ class MarigoldDepthOutput(BaseOutput):
78
86
 
79
87
  Args:
80
88
  prediction (`np.ndarray`, `torch.Tensor`):
81
- Predicted depth maps with values in the range [0, 1]. The shape is always $numimages \times 1 \times height
82
- \times width$, regardless of whether the images were passed as a 4D array or a list.
89
+ Predicted depth maps with values in the range [0, 1]. The shape is $numimages \times 1 \times height \times
90
+ width$ for `torch.Tensor` or $numimages \times height \times width \times 1$ for `np.ndarray`.
83
91
  uncertainty (`None`, `np.ndarray`, `torch.Tensor`):
84
92
  Uncertainty maps computed from the ensemble, with values in the range [0, 1]. The shape is $numimages
85
- \times 1 \times height \times width$.
93
+ \times 1 \times height \times width$ for `torch.Tensor` or $numimages \times height \times width \times 1$
94
+ for `np.ndarray`.
86
95
  latent (`None`, `torch.Tensor`):
87
96
  Latent features corresponding to the predictions, compatible with the `latents` argument of the pipeline.
88
97
  The shape is $numimages * numensemble \times 4 \times latentheight \times latentwidth$.
@@ -174,7 +183,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
174
183
  default_processing_resolution=default_processing_resolution,
175
184
  )
176
185
 
177
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
186
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
178
187
 
179
188
  self.scale_invariant = scale_invariant
180
189
  self.shift_invariant = shift_invariant
@@ -200,6 +209,11 @@ class MarigoldDepthPipeline(DiffusionPipeline):
200
209
  output_type: str,
201
210
  output_uncertainty: bool,
202
211
  ) -> int:
212
+ actual_vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
213
+ if actual_vae_scale_factor != self.vae_scale_factor:
214
+ raise ValueError(
215
+ f"`vae_scale_factor` computed at initialization ({self.vae_scale_factor}) differs from the actual one ({actual_vae_scale_factor})."
216
+ )
203
217
  if num_inference_steps is None:
204
218
  raise ValueError("`num_inference_steps` is not specified and could not be resolved from the model config.")
205
219
  if num_inference_steps < 1:
@@ -312,6 +326,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
312
326
 
313
327
  return num_images
314
328
 
329
+ @torch.compiler.disable
315
330
  def progress_bar(self, iterable=None, total=None, desc=None, leave=True):
316
331
  if not hasattr(self, "_progress_bar_config"):
317
332
  self._progress_bar_config = {}
@@ -362,11 +377,9 @@ class MarigoldDepthPipeline(DiffusionPipeline):
362
377
  same width and height.
363
378
  num_inference_steps (`int`, *optional*, defaults to `None`):
364
379
  Number of denoising diffusion steps during inference. The default value `None` results in automatic
365
- selection. The number of steps should be at least 10 with the full Marigold models, and between 1 and 4
366
- for Marigold-LCM models.
380
+ selection.
367
381
  ensemble_size (`int`, defaults to `1`):
368
- Number of ensemble predictions. Recommended values are 5 and higher for better precision, or 1 for
369
- faster inference.
382
+ Number of ensemble predictions. Higher values result in measurable improvements and visual degradation.
370
383
  processing_resolution (`int`, *optional*, defaults to `None`):
371
384
  Effective processing resolution. When set to `0`, matches the larger input image dimension. This
372
385
  produces crisper predictions, but may also lead to the overall loss of global context. The default
@@ -478,9 +491,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
478
491
  # `pred_latent` variable. The variable `image_latent` is of the same shape: it contains each input image encoded
479
492
  # into latent space and replicated `E` times. The latents can be either generated (see `generator` to ensure
480
493
  # reproducibility), or passed explicitly via the `latents` argument. The latter can be set outside the pipeline
481
- # code. For example, in the Marigold-LCM video processing demo, the latents initialization of a frame is taken
482
- # as a convex combination of the latents output of the pipeline for the previous frame and a newly-sampled
483
- # noise. This behavior can be achieved by setting the `output_latent` argument to `True`. The latent space
494
+ # code. This behavior can be achieved by setting the `output_latent` argument to `True`. The latent space
484
495
  # dimensions are `(h, w)`. Encoding into latent space happens in batches of size `batch_size`.
485
496
  # Model invocation: self.vae.encoder.
486
497
  image_latent, pred_latent = self.prepare_latents(
@@ -517,6 +528,9 @@ class MarigoldDepthPipeline(DiffusionPipeline):
517
528
  noise, t, batch_pred_latent, generator=generator
518
529
  ).prev_sample # [B,4,h,w]
519
530
 
531
+ if XLA_AVAILABLE:
532
+ xm.mark_step()
533
+
520
534
  pred_latents.append(batch_pred_latent)
521
535
 
522
536
  pred_latent = torch.cat(pred_latents, dim=0) # [N*E,4,h,w]
@@ -722,6 +736,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
722
736
  param = init_s.cpu().numpy()
723
737
  else:
724
738
  raise ValueError("Unrecognized alignment.")
739
+ param = param.astype(np.float64)
725
740
 
726
741
  return param
727
742
 
@@ -764,7 +779,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
764
779
 
765
780
  if regularizer_strength > 0:
766
781
  prediction, _ = ensemble(depth_aligned, return_uncertainty=False)
767
- err_near = (0.0 - prediction.min()).abs().item()
782
+ err_near = prediction.min().abs().item()
768
783
  err_far = (1.0 - prediction.max()).abs().item()
769
784
  cost += (err_near + err_far) * regularizer_strength
770
785