diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +17 -12
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. diffusers-0.33.0.dist-info/RECORD +0 -608
  475. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  476. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
  477. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -177,7 +177,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
177
177
  r"""
178
178
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
179
179
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
180
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
180
+ Flawed](https://huggingface.co/papers/2305.08891).
181
181
 
182
182
  Args:
183
183
  noise_cfg (`torch.Tensor`):
@@ -515,7 +515,7 @@ class EasyAnimateControlPipeline(DiffusionPipeline):
515
515
  def prepare_extra_step_kwargs(self, generator, eta):
516
516
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
517
517
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
518
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
518
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
519
519
  # and should be between [0, 1]
520
520
 
521
521
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -653,7 +653,7 @@ class EasyAnimateControlPipeline(DiffusionPipeline):
653
653
  return self._guidance_rescale
654
654
 
655
655
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
656
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
656
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
657
657
  # corresponds to doing no classifier free guidance.
658
658
  @property
659
659
  def do_classifier_free_guidance(self):
@@ -956,7 +956,7 @@ class EasyAnimateControlPipeline(DiffusionPipeline):
956
956
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
957
957
 
958
958
  if self.do_classifier_free_guidance and guidance_rescale > 0.0:
959
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
959
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
960
960
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
961
961
 
962
962
  # compute the previous noisy sample x_t -> x_t-1
@@ -199,7 +199,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
199
199
  r"""
200
200
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
201
201
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
202
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
202
+ Flawed](https://huggingface.co/papers/2305.08891).
203
203
 
204
204
  Args:
205
205
  noise_cfg (`torch.Tensor`):
@@ -557,7 +557,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
557
557
  def prepare_extra_step_kwargs(self, generator, eta):
558
558
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
559
559
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
560
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
560
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
561
561
  # and should be between [0, 1]
562
562
 
563
563
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -771,7 +771,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
771
771
  return self._guidance_rescale
772
772
 
773
773
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
774
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
774
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
775
775
  # corresponds to doing no classifier free guidance.
776
776
  @property
777
777
  def do_classifier_free_guidance(self):
@@ -849,7 +849,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
849
849
  num_images_per_prompt (`int`, *optional*, defaults to 1):
850
850
  The number of images to generate per prompt.
851
851
  eta (`float`, *optional*, defaults to 0.0):
852
- A parameter defined in the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies to the
852
+ A parameter defined in the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only applies to the
853
853
  [`~schedulers.DDIMScheduler`] and is ignored in other schedulers. It adjusts noise level during the
854
854
  inference process.
855
855
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -883,7 +883,8 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
883
883
  inputs will be passed, facilitating enhanced logging or monitoring of the generation process.
884
884
  guidance_rescale (`float`, *optional*, defaults to 0.0):
885
885
  Rescale parameter for adjusting noise configuration based on guidance rescale. Based on findings from
886
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
886
+ [Common Diffusion Noise Schedules and Sample Steps are
887
+ Flawed](https://huggingface.co/papers/2305.08891).
887
888
  strength (`float`, *optional*, defaults to 1.0):
888
889
  Affects the overall styling or quality of the generated output. Values closer to 1 usually provide
889
890
  direct adherence to prompts.
@@ -1180,7 +1181,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
1180
1181
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
1181
1182
 
1182
1183
  if self.do_classifier_free_guidance and guidance_rescale > 0.0:
1183
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1184
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1184
1185
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
1185
1186
 
1186
1187
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -490,14 +490,6 @@ class FluxPipeline(
490
490
  f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
491
491
  )
492
492
 
493
- if prompt_embeds is not None and negative_prompt_embeds is not None:
494
- if prompt_embeds.shape != negative_prompt_embeds.shape:
495
- raise ValueError(
496
- "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
497
- f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
498
- f" {negative_prompt_embeds.shape}."
499
- )
500
-
501
493
  if prompt_embeds is not None and pooled_prompt_embeds is None:
502
494
  raise ValueError(
503
495
  "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
@@ -695,11 +687,11 @@ class FluxPipeline(
695
687
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
696
688
  will be used.
697
689
  guidance_scale (`float`, *optional*, defaults to 3.5):
698
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
699
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
700
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
701
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
702
- usually at the expense of lower image quality.
690
+ Guidance scale as defined in [Classifier-Free Diffusion
691
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
692
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
693
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
694
+ the text `prompt`, usually at the expense of lower image quality.
703
695
  num_images_per_prompt (`int`, *optional*, defaults to 1):
704
696
  The number of images to generate per prompt.
705
697
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -708,7 +700,7 @@ class FluxPipeline(
708
700
  latents (`torch.FloatTensor`, *optional*):
709
701
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
710
702
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
711
- tensor will ge generated by sampling using the supplied random `generator`.
703
+ tensor will be generated by sampling using the supplied random `generator`.
712
704
  prompt_embeds (`torch.FloatTensor`, *optional*):
713
705
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
714
706
  provided, text embeddings will be generated from `prompt` input argument.
@@ -821,7 +813,7 @@ class FluxPipeline(
821
813
  (
822
814
  negative_prompt_embeds,
823
815
  negative_pooled_prompt_embeds,
824
- _,
816
+ negative_text_ids,
825
817
  ) = self.encode_prompt(
826
818
  prompt=negative_prompt,
827
819
  prompt_2=negative_prompt_2,
@@ -906,6 +898,7 @@ class FluxPipeline(
906
898
  )
907
899
 
908
900
  # 6. Denoising loop
901
+ self.scheduler.set_begin_index(0)
909
902
  with self.progress_bar(total=num_inference_steps) as progress_bar:
910
903
  for i, t in enumerate(timesteps):
911
904
  if self.interrupt:
@@ -938,7 +931,7 @@ class FluxPipeline(
938
931
  guidance=guidance,
939
932
  pooled_projections=negative_pooled_prompt_embeds,
940
933
  encoder_hidden_states=negative_prompt_embeds,
941
- txt_ids=text_ids,
934
+ txt_ids=negative_text_ids,
942
935
  img_ids=latent_image_ids,
943
936
  joint_attention_kwargs=self.joint_attention_kwargs,
944
937
  return_dict=False,
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -661,11 +661,11 @@ class FluxControlPipeline(
661
661
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
662
662
  will be used.
663
663
  guidance_scale (`float`, *optional*, defaults to 3.5):
664
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
665
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
666
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
667
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
668
- usually at the expense of lower image quality.
664
+ Guidance scale as defined in [Classifier-Free Diffusion
665
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
666
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
667
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
668
+ the text `prompt`, usually at the expense of lower image quality.
669
669
  num_images_per_prompt (`int`, *optional*, defaults to 1):
670
670
  The number of images to generate per prompt.
671
671
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -699,11 +699,11 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
699
699
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
700
700
  will be used.
701
701
  guidance_scale (`float`, *optional*, defaults to 7.0):
702
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
703
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
704
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
705
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
706
- usually at the expense of lower image quality.
702
+ Guidance scale as defined in [Classifier-Free Diffusion
703
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
704
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
705
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
706
+ the text `prompt`, usually at the expense of lower image quality.
707
707
  num_images_per_prompt (`int`, *optional*, defaults to 1):
708
708
  The number of images to generate per prompt.
709
709
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -857,11 +857,11 @@ class FluxControlInpaintPipeline(
857
857
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
858
858
  will be used.
859
859
  guidance_scale (`float`, *optional*, defaults to 7.0):
860
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
861
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
862
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
863
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
864
- usually at the expense of lower image quality.
860
+ Guidance scale as defined in [Classifier-Free Diffusion
861
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
862
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
863
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
864
+ the text `prompt`, usually at the expense of lower image quality.
865
865
  num_images_per_prompt (`int`, *optional*, defaults to 1):
866
866
  The number of images to generate per prompt.
867
867
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -733,11 +733,11 @@ class FluxControlNetPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleF
733
733
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
734
734
  will be used.
735
735
  guidance_scale (`float`, *optional*, defaults to 7.0):
736
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
737
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
738
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
739
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
740
- usually at the expense of lower image quality.
736
+ Guidance scale as defined in [Classifier-Free Diffusion
737
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
738
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
739
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
740
+ the text `prompt`, usually at the expense of lower image quality.
741
741
  control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
742
742
  The percentage of total steps at which the ControlNet starts applying.
743
743
  control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
@@ -687,7 +687,8 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
687
687
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
688
688
  will be used.
689
689
  guidance_scale (`float`, *optional*, defaults to 7.0):
690
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
690
+ Guidance scale as defined in [Classifier-Free Diffusion
691
+ Guidance](https://huggingface.co/papers/2207.12598).
691
692
  control_mode (`int` or `List[int]`, *optional*):
692
693
  The mode for the ControlNet. If multiple ControlNets are used, this should be a list.
693
694
  controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
@@ -800,17 +801,20 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
800
801
  )
801
802
  height, width = control_image.shape[-2:]
802
803
 
803
- control_image = retrieve_latents(self.vae.encode(control_image), generator=generator)
804
- control_image = (control_image - self.vae.config.shift_factor) * self.vae.config.scaling_factor
804
+ # xlab controlnet has a input_hint_block and instantx controlnet does not
805
+ controlnet_blocks_repeat = False if self.controlnet.input_hint_block is None else True
806
+ if self.controlnet.input_hint_block is None:
807
+ control_image = retrieve_latents(self.vae.encode(control_image), generator=generator)
808
+ control_image = (control_image - self.vae.config.shift_factor) * self.vae.config.scaling_factor
805
809
 
806
- height_control_image, width_control_image = control_image.shape[2:]
807
- control_image = self._pack_latents(
808
- control_image,
809
- batch_size * num_images_per_prompt,
810
- num_channels_latents,
811
- height_control_image,
812
- width_control_image,
813
- )
810
+ height_control_image, width_control_image = control_image.shape[2:]
811
+ control_image = self._pack_latents(
812
+ control_image,
813
+ batch_size * num_images_per_prompt,
814
+ num_channels_latents,
815
+ height_control_image,
816
+ width_control_image,
817
+ )
814
818
 
815
819
  if control_mode is not None:
816
820
  control_mode = torch.tensor(control_mode).to(device, dtype=torch.long)
@@ -819,7 +823,9 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
819
823
  elif isinstance(self.controlnet, FluxMultiControlNetModel):
820
824
  control_images = []
821
825
 
822
- for control_image_ in control_image:
826
+ # xlab controlnet has a input_hint_block and instantx controlnet does not
827
+ controlnet_blocks_repeat = False if self.controlnet.nets[0].input_hint_block is None else True
828
+ for i, control_image_ in enumerate(control_image):
823
829
  control_image_ = self.prepare_image(
824
830
  image=control_image_,
825
831
  width=width,
@@ -831,17 +837,18 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
831
837
  )
832
838
  height, width = control_image_.shape[-2:]
833
839
 
834
- control_image_ = retrieve_latents(self.vae.encode(control_image_), generator=generator)
835
- control_image_ = (control_image_ - self.vae.config.shift_factor) * self.vae.config.scaling_factor
840
+ if self.controlnet.nets[0].input_hint_block is None:
841
+ control_image_ = retrieve_latents(self.vae.encode(control_image_), generator=generator)
842
+ control_image_ = (control_image_ - self.vae.config.shift_factor) * self.vae.config.scaling_factor
836
843
 
837
- height_control_image, width_control_image = control_image_.shape[2:]
838
- control_image_ = self._pack_latents(
839
- control_image_,
840
- batch_size * num_images_per_prompt,
841
- num_channels_latents,
842
- height_control_image,
843
- width_control_image,
844
- )
844
+ height_control_image, width_control_image = control_image_.shape[2:]
845
+ control_image_ = self._pack_latents(
846
+ control_image_,
847
+ batch_size * num_images_per_prompt,
848
+ num_channels_latents,
849
+ height_control_image,
850
+ width_control_image,
851
+ )
845
852
 
846
853
  control_images.append(control_image_)
847
854
 
@@ -955,6 +962,7 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
955
962
  img_ids=latent_image_ids,
956
963
  joint_attention_kwargs=self.joint_attention_kwargs,
957
964
  return_dict=False,
965
+ controlnet_blocks_repeat=controlnet_blocks_repeat,
958
966
  )[0]
959
967
 
960
968
  latents_dtype = latents.dtype
@@ -801,7 +801,8 @@ class FluxControlNetInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
801
801
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
802
802
  will be used.
803
803
  guidance_scale (`float`, *optional*, defaults to 7.0):
804
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
804
+ Guidance scale as defined in [Classifier-Free Diffusion
805
+ Guidance](https://huggingface.co/papers/2207.12598).
805
806
  control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
806
807
  The percentage of total steps at which the ControlNet starts applying.
807
808
  control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -794,11 +794,11 @@ class FluxFillPipeline(
794
794
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
795
795
  will be used.
796
796
  guidance_scale (`float`, *optional*, defaults to 30.0):
797
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
798
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
799
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
800
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
801
- usually at the expense of lower image quality.
797
+ Guidance scale as defined in [Classifier-Free Diffusion
798
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
799
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
800
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
801
+ the text `prompt`, usually at the expense of lower image quality.
802
802
  num_images_per_prompt (`int`, *optional*, defaults to 1):
803
803
  The number of images to generate per prompt.
804
804
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -607,6 +607,39 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
607
607
 
608
608
  return latents
609
609
 
610
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.enable_vae_slicing
611
+ def enable_vae_slicing(self):
612
+ r"""
613
+ Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
614
+ compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
615
+ """
616
+ self.vae.enable_slicing()
617
+
618
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_slicing
619
+ def disable_vae_slicing(self):
620
+ r"""
621
+ Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
622
+ computing decoding in one step.
623
+ """
624
+ self.vae.disable_slicing()
625
+
626
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.enable_vae_tiling
627
+ def enable_vae_tiling(self):
628
+ r"""
629
+ Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
630
+ compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
631
+ processing larger images.
632
+ """
633
+ self.vae.enable_tiling()
634
+
635
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
636
+ def disable_vae_tiling(self):
637
+ r"""
638
+ Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
639
+ computing decoding in one step.
640
+ """
641
+ self.vae.disable_tiling()
642
+
610
643
  def prepare_latents(
611
644
  self,
612
645
  image,
@@ -741,11 +774,11 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
741
774
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
742
775
  will be used.
743
776
  guidance_scale (`float`, *optional*, defaults to 7.0):
744
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
745
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
746
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
747
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
748
- usually at the expense of lower image quality.
777
+ Guidance scale as defined in [Classifier-Free Diffusion
778
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
779
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
780
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
781
+ the text `prompt`, usually at the expense of lower image quality.
749
782
  num_images_per_prompt (`int`, *optional*, defaults to 1):
750
783
  The number of images to generate per prompt.
751
784
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -860,11 +860,11 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FluxIPAdapterM
860
860
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
861
861
  will be used.
862
862
  guidance_scale (`float`, *optional*, defaults to 7.0):
863
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
864
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
865
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
866
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
867
- usually at the expense of lower image quality.
863
+ Guidance scale as defined in [Classifier-Free Diffusion
864
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
865
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
866
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
867
+ the text `prompt`, usually at the expense of lower image quality.
868
868
  num_images_per_prompt (`int`, *optional*, defaults to 1):
869
869
  The number of images to generate per prompt.
870
870
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1193,6 +1193,11 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FluxIPAdapterM
1193
1193
  image = self.vae.decode(latents, return_dict=False)[0]
1194
1194
  image = self.image_processor.postprocess(image, output_type=output_type)
1195
1195
 
1196
+ if padding_mask_crop is not None:
1197
+ image = [
1198
+ self.image_processor.apply_overlay(mask_image, original_image, i, crops_coords) for i in image
1199
+ ]
1200
+
1196
1201
  # Offload all models
1197
1202
  self.maybe_free_model_hooks()
1198
1203
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@ class FreeInitMixin:
33
33
  spatial_stop_frequency: float = 0.25,
34
34
  temporal_stop_frequency: float = 0.25,
35
35
  ):
36
- """Enables the FreeInit mechanism as in https://arxiv.org/abs/2312.07537.
36
+ """Enables the FreeInit mechanism as in https://huggingface.co/papers/2312.07537.
37
37
 
38
38
  This implementation has been adapted from the [official repository](https://github.com/TianxingWu/FreeInit).
39
39
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -143,7 +143,7 @@ class SplitInferenceModule(nn.Module):
143
143
 
144
144
 
145
145
  class AnimateDiffFreeNoiseMixin:
146
- r"""Mixin class for [FreeNoise](https://arxiv.org/abs/2310.15169)."""
146
+ r"""Mixin class for [FreeNoise](https://huggingface.co/papers/2310.15169)."""
147
147
 
148
148
  def _enable_free_noise_in_block(self, block: Union[CrossAttnDownBlockMotion, DownBlockMotion, UpBlockMotion]):
149
149
  r"""Helper function to enable FreeNoise in transformer blocks."""
@@ -478,7 +478,7 @@ class AnimateDiffFreeNoiseMixin:
478
478
  Must be one of ["shuffle_context", "repeat_context", "random"].
479
479
  - "shuffle_context"
480
480
  Shuffles a fixed batch of `context_length` latents to create a final latent of size
481
- `num_frames`. This is usually the best setting for most generation scenarious. However, there
481
+ `num_frames`. This is usually the best setting for most generation scenarios. However, there
482
482
  might be visible repetition noticeable in the kinds of motion/animation generated.
483
483
  - "repeated_context"
484
484
  Repeats a fixed batch of `context_length` latents to create a final latent of size
@@ -0,0 +1,47 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _additional_imports = {}
15
+ _import_structure = {"pipeline_output": ["HiDreamImagePipelineOutput"]}
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_hidream_image"] = ["HiDreamImagePipeline"]
26
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
27
+ try:
28
+ if not (is_transformers_available() and is_torch_available()):
29
+ raise OptionalDependencyNotAvailable()
30
+ except OptionalDependencyNotAvailable:
31
+ from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
32
+ else:
33
+ from .pipeline_hidream_image import HiDreamImagePipeline
34
+ else:
35
+ import sys
36
+
37
+ sys.modules[__name__] = _LazyModule(
38
+ __name__,
39
+ globals()["__file__"],
40
+ _import_structure,
41
+ module_spec=__spec__,
42
+ )
43
+
44
+ for name, value in _dummy_objects.items():
45
+ setattr(sys.modules[__name__], name, value)
46
+ for name, value in _additional_imports.items():
47
+ setattr(sys.modules[__name__], name, value)