diffusers 0.32.2__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +121 -86
  13. diffusers/loaders/lora_conversion_utils.py +504 -44
  14. diffusers/loaders/lora_pipeline.py +1769 -181
  15. diffusers/loaders/peft.py +167 -57
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +646 -72
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +20 -7
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +9 -1
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +2 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.2.dist-info/RECORD +0 -550
  387. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,20 @@
1
+ from dataclasses import dataclass
2
+
3
+ import torch
4
+
5
+ from diffusers.utils import BaseOutput
6
+
7
+
8
+ @dataclass
9
+ class EasyAnimatePipelineOutput(BaseOutput):
10
+ r"""
11
+ Output class for EasyAnimate pipelines.
12
+
13
+ Args:
14
+ frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
15
+ List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
16
+ denoised PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
17
+ `(batch_size, num_frames, channels, height, width)`.
18
+ """
19
+
20
+ frames: torch.Tensor
@@ -28,8 +28,7 @@ from transformers import (
28
28
 
29
29
  from ...image_processor import PipelineImageInput, VaeImageProcessor
30
30
  from ...loaders import FluxIPAdapterMixin, FluxLoraLoaderMixin, FromSingleFileMixin, TextualInversionLoaderMixin
31
- from ...models.autoencoders import AutoencoderKL
32
- from ...models.transformers import FluxTransformer2DModel
31
+ from ...models import AutoencoderKL, FluxTransformer2DModel
33
32
  from ...schedulers import FlowMatchEulerDiscreteScheduler
34
33
  from ...utils import (
35
34
  USE_PEFT_BACKEND,
@@ -76,7 +75,7 @@ def calculate_shift(
76
75
  base_seq_len: int = 256,
77
76
  max_seq_len: int = 4096,
78
77
  base_shift: float = 0.5,
79
- max_shift: float = 1.16,
78
+ max_shift: float = 1.15,
80
79
  ):
81
80
  m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
82
81
  b = base_shift - m * base_seq_len
@@ -206,9 +205,7 @@ class FluxPipeline(
206
205
  image_encoder=image_encoder,
207
206
  feature_extractor=feature_extractor,
208
207
  )
209
- self.vae_scale_factor = (
210
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
211
- )
208
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
212
209
  # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
213
210
  # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
214
211
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
@@ -408,23 +405,28 @@ class FluxPipeline(
408
405
  if not isinstance(ip_adapter_image, list):
409
406
  ip_adapter_image = [ip_adapter_image]
410
407
 
411
- if len(ip_adapter_image) != len(self.transformer.encoder_hid_proj.image_projection_layers):
408
+ if len(ip_adapter_image) != self.transformer.encoder_hid_proj.num_ip_adapters:
412
409
  raise ValueError(
413
- f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.transformer.encoder_hid_proj.image_projection_layers)} IP Adapters."
410
+ f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {self.transformer.encoder_hid_proj.num_ip_adapters} IP Adapters."
414
411
  )
415
412
 
416
- for single_ip_adapter_image, image_proj_layer in zip(
417
- ip_adapter_image, self.transformer.encoder_hid_proj.image_projection_layers
418
- ):
413
+ for single_ip_adapter_image in ip_adapter_image:
419
414
  single_image_embeds = self.encode_image(single_ip_adapter_image, device, 1)
420
-
421
415
  image_embeds.append(single_image_embeds[None, :])
422
416
  else:
417
+ if not isinstance(ip_adapter_image_embeds, list):
418
+ ip_adapter_image_embeds = [ip_adapter_image_embeds]
419
+
420
+ if len(ip_adapter_image_embeds) != self.transformer.encoder_hid_proj.num_ip_adapters:
421
+ raise ValueError(
422
+ f"`ip_adapter_image_embeds` must have same length as the number of IP Adapters. Got {len(ip_adapter_image_embeds)} image embeds and {self.transformer.encoder_hid_proj.num_ip_adapters} IP Adapters."
423
+ )
424
+
423
425
  for single_image_embeds in ip_adapter_image_embeds:
424
426
  image_embeds.append(single_image_embeds)
425
427
 
426
428
  ip_adapter_image_embeds = []
427
- for i, single_image_embeds in enumerate(image_embeds):
429
+ for single_image_embeds in image_embeds:
428
430
  single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
429
431
  single_image_embeds = single_image_embeds.to(device=device)
430
432
  ip_adapter_image_embeds.append(single_image_embeds)
@@ -622,6 +624,10 @@ class FluxPipeline(
622
624
  def num_timesteps(self):
623
625
  return self._num_timesteps
624
626
 
627
+ @property
628
+ def current_timestep(self):
629
+ return self._current_timestep
630
+
625
631
  @property
626
632
  def interrupt(self):
627
633
  return self._interrupt
@@ -667,7 +673,16 @@ class FluxPipeline(
667
673
  instead.
668
674
  prompt_2 (`str` or `List[str]`, *optional*):
669
675
  The prompt or prompts to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
670
- will be used instead
676
+ will be used instead.
677
+ negative_prompt (`str` or `List[str]`, *optional*):
678
+ The prompt or prompts not to guide the image generation. If not defined, one has to pass
679
+ `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is
680
+ not greater than `1`).
681
+ negative_prompt_2 (`str` or `List[str]`, *optional*):
682
+ The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
683
+ `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders.
684
+ true_cfg_scale (`float`, *optional*, defaults to 1.0):
685
+ When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
671
686
  height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
672
687
  The height in pixels of the generated image. This is set to 1024 by default for the best results.
673
688
  width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
@@ -679,7 +694,7 @@ class FluxPipeline(
679
694
  Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
680
695
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
681
696
  will be used.
682
- guidance_scale (`float`, *optional*, defaults to 7.0):
697
+ guidance_scale (`float`, *optional*, defaults to 3.5):
683
698
  Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
684
699
  `guidance_scale` is defined as `w` of equation 2. of [Imagen
685
700
  Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
@@ -711,6 +726,14 @@ class FluxPipeline(
711
726
  Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
712
727
  IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. If not
713
728
  provided, embeddings are computed from the `ip_adapter_image` input argument.
729
+ negative_prompt_embeds (`torch.FloatTensor`, *optional*):
730
+ Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
731
+ weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
732
+ argument.
733
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
734
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
735
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
736
+ input argument.
714
737
  output_type (`str`, *optional*, defaults to `"pil"`):
715
738
  The output format of the generate image. Choose between
716
739
  [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -760,6 +783,7 @@ class FluxPipeline(
760
783
 
761
784
  self._guidance_scale = guidance_scale
762
785
  self._joint_attention_kwargs = joint_attention_kwargs
786
+ self._current_timestep = None
763
787
  self._interrupt = False
764
788
 
765
789
  # 2. Define call parameters
@@ -775,7 +799,10 @@ class FluxPipeline(
775
799
  lora_scale = (
776
800
  self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
777
801
  )
778
- do_true_cfg = true_cfg_scale > 1 and negative_prompt is not None
802
+ has_neg_prompt = negative_prompt is not None or (
803
+ negative_prompt_embeds is not None and negative_pooled_prompt_embeds is not None
804
+ )
805
+ do_true_cfg = true_cfg_scale > 1 and has_neg_prompt
779
806
  (
780
807
  prompt_embeds,
781
808
  pooled_prompt_embeds,
@@ -824,10 +851,10 @@ class FluxPipeline(
824
851
  image_seq_len = latents.shape[1]
825
852
  mu = calculate_shift(
826
853
  image_seq_len,
827
- self.scheduler.config.base_image_seq_len,
828
- self.scheduler.config.max_image_seq_len,
829
- self.scheduler.config.base_shift,
830
- self.scheduler.config.max_shift,
854
+ self.scheduler.config.get("base_image_seq_len", 256),
855
+ self.scheduler.config.get("max_image_seq_len", 4096),
856
+ self.scheduler.config.get("base_shift", 0.5),
857
+ self.scheduler.config.get("max_shift", 1.15),
831
858
  )
832
859
  timesteps, num_inference_steps = retrieve_timesteps(
833
860
  self.scheduler,
@@ -850,10 +877,13 @@ class FluxPipeline(
850
877
  negative_ip_adapter_image is None and negative_ip_adapter_image_embeds is None
851
878
  ):
852
879
  negative_ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
880
+ negative_ip_adapter_image = [negative_ip_adapter_image] * self.transformer.encoder_hid_proj.num_ip_adapters
881
+
853
882
  elif (ip_adapter_image is None and ip_adapter_image_embeds is None) and (
854
883
  negative_ip_adapter_image is not None or negative_ip_adapter_image_embeds is not None
855
884
  ):
856
885
  ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
886
+ ip_adapter_image = [ip_adapter_image] * self.transformer.encoder_hid_proj.num_ip_adapters
857
887
 
858
888
  if self.joint_attention_kwargs is None:
859
889
  self._joint_attention_kwargs = {}
@@ -881,6 +911,7 @@ class FluxPipeline(
881
911
  if self.interrupt:
882
912
  continue
883
913
 
914
+ self._current_timestep = t
884
915
  if image_embeds is not None:
885
916
  self._joint_attention_kwargs["ip_adapter_image_embeds"] = image_embeds
886
917
  # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
@@ -939,9 +970,10 @@ class FluxPipeline(
939
970
  if XLA_AVAILABLE:
940
971
  xm.mark_step()
941
972
 
973
+ self._current_timestep = None
974
+
942
975
  if output_type == "latent":
943
976
  image = latents
944
-
945
977
  else:
946
978
  latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
947
979
  latents = (latents / self.vae.config.scaling_factor) + self.vae.config.shift_factor
@@ -82,12 +82,13 @@ EXAMPLE_DOC_STRING = """
82
82
  """
83
83
 
84
84
 
85
+ # Copied from diffusers.pipelines.flux.pipeline_flux.calculate_shift
85
86
  def calculate_shift(
86
87
  image_seq_len,
87
88
  base_seq_len: int = 256,
88
89
  max_seq_len: int = 4096,
89
90
  base_shift: float = 0.5,
90
- max_shift: float = 1.16,
91
+ max_shift: float = 1.15,
91
92
  ):
92
93
  m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
93
94
  b = base_shift - m * base_seq_len
@@ -212,12 +213,8 @@ class FluxControlPipeline(
212
213
  transformer=transformer,
213
214
  scheduler=scheduler,
214
215
  )
215
- self.vae_scale_factor = (
216
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
217
- )
218
- self.vae_latent_channels = (
219
- self.vae.config.latent_channels if hasattr(self, "vae") and self.vae is not None else 16
220
- )
216
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
217
+ self.vae_latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
221
218
  # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
222
219
  # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
223
220
  self.image_processor = VaeImageProcessor(
@@ -663,7 +660,7 @@ class FluxControlPipeline(
663
660
  Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
664
661
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
665
662
  will be used.
666
- guidance_scale (`float`, *optional*, defaults to 7.0):
663
+ guidance_scale (`float`, *optional*, defaults to 3.5):
667
664
  Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
668
665
  `guidance_scale` is defined as `w` of equation 2. of [Imagen
669
666
  Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
@@ -802,10 +799,10 @@ class FluxControlPipeline(
802
799
  image_seq_len = latents.shape[1]
803
800
  mu = calculate_shift(
804
801
  image_seq_len,
805
- self.scheduler.config.base_image_seq_len,
806
- self.scheduler.config.max_image_seq_len,
807
- self.scheduler.config.base_shift,
808
- self.scheduler.config.max_shift,
802
+ self.scheduler.config.get("base_image_seq_len", 256),
803
+ self.scheduler.config.get("max_image_seq_len", 4096),
804
+ self.scheduler.config.get("base_shift", 0.5),
805
+ self.scheduler.config.get("max_shift", 1.15),
809
806
  )
810
807
  timesteps, num_inference_steps = retrieve_timesteps(
811
808
  self.scheduler,
@@ -93,7 +93,7 @@ def calculate_shift(
93
93
  base_seq_len: int = 256,
94
94
  max_seq_len: int = 4096,
95
95
  base_shift: float = 0.5,
96
- max_shift: float = 1.16,
96
+ max_shift: float = 1.15,
97
97
  ):
98
98
  m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
99
99
  b = base_shift - m * base_seq_len
@@ -227,9 +227,7 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
227
227
  transformer=transformer,
228
228
  scheduler=scheduler,
229
229
  )
230
- self.vae_scale_factor = (
231
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
232
- )
230
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
233
231
  # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
234
232
  # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
235
233
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
@@ -440,7 +438,6 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
440
438
 
441
439
  return timesteps, num_inference_steps - t_start
442
440
 
443
- # Copied from diffusers.pipelines.flux.pipeline_flux_img2img.FluxImg2ImgPipeline.check_inputs
444
441
  def check_inputs(
445
442
  self,
446
443
  prompt,
@@ -536,7 +533,6 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
536
533
 
537
534
  return latents
538
535
 
539
- # Copied from diffusers.pipelines.flux.pipeline_flux_img2img.FluxImg2ImgPipeline.prepare_latents
540
536
  def prepare_latents(
541
537
  self,
542
538
  image,
@@ -809,10 +805,10 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
809
805
  image_seq_len = (int(height) // self.vae_scale_factor // 2) * (int(width) // self.vae_scale_factor // 2)
810
806
  mu = calculate_shift(
811
807
  image_seq_len,
812
- self.scheduler.config.base_image_seq_len,
813
- self.scheduler.config.max_image_seq_len,
814
- self.scheduler.config.base_shift,
815
- self.scheduler.config.max_shift,
808
+ self.scheduler.config.get("base_image_seq_len", 256),
809
+ self.scheduler.config.get("max_image_seq_len", 4096),
810
+ self.scheduler.config.get("base_shift", 0.5),
811
+ self.scheduler.config.get("max_shift", 1.15),
816
812
  )
817
813
  timesteps, num_inference_steps = retrieve_timesteps(
818
814
  self.scheduler,
@@ -119,7 +119,7 @@ def calculate_shift(
119
119
  base_seq_len: int = 256,
120
120
  max_seq_len: int = 4096,
121
121
  base_shift: float = 0.5,
122
- max_shift: float = 1.16,
122
+ max_shift: float = 1.15,
123
123
  ):
124
124
  m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
125
125
  b = base_shift - m * base_seq_len
@@ -258,15 +258,14 @@ class FluxControlInpaintPipeline(
258
258
  transformer=transformer,
259
259
  scheduler=scheduler,
260
260
  )
261
- self.vae_scale_factor = (
262
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
263
- )
261
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
264
262
  # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
265
263
  # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
266
264
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
265
+ latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
267
266
  self.mask_processor = VaeImageProcessor(
268
267
  vae_scale_factor=self.vae_scale_factor * 2,
269
- vae_latent_channels=self.vae.config.latent_channels,
268
+ vae_latent_channels=latent_channels,
270
269
  do_normalize=False,
271
270
  do_binarize=True,
272
271
  do_convert_grayscale=True,
@@ -478,7 +477,6 @@ class FluxControlInpaintPipeline(
478
477
 
479
478
  return timesteps, num_inference_steps - t_start
480
479
 
481
- # Copied from diffusers.pipelines.flux.pipeline_flux_img2img.FluxImg2ImgPipeline.check_inputs
482
480
  def check_inputs(
483
481
  self,
484
482
  prompt,
@@ -985,10 +983,10 @@ class FluxControlInpaintPipeline(
985
983
  image_seq_len = (int(height) // self.vae_scale_factor // 2) * (int(width) // self.vae_scale_factor // 2)
986
984
  mu = calculate_shift(
987
985
  image_seq_len,
988
- self.scheduler.config.base_image_seq_len,
989
- self.scheduler.config.max_image_seq_len,
990
- self.scheduler.config.base_shift,
991
- self.scheduler.config.max_shift,
986
+ self.scheduler.config.get("base_image_seq_len", 256),
987
+ self.scheduler.config.get("max_image_seq_len", 4096),
988
+ self.scheduler.config.get("base_shift", 0.5),
989
+ self.scheduler.config.get("max_shift", 1.15),
992
990
  )
993
991
  timesteps, num_inference_steps = retrieve_timesteps(
994
992
  self.scheduler,