diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (551) hide show
  1. diffusers/__init__.py +145 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/__init__.py +1 -1
  4. diffusers/commands/custom_blocks.py +134 -0
  5. diffusers/commands/diffusers_cli.py +3 -1
  6. diffusers/commands/env.py +1 -1
  7. diffusers/commands/fp16_safetensors.py +2 -2
  8. diffusers/configuration_utils.py +11 -2
  9. diffusers/dependency_versions_check.py +1 -1
  10. diffusers/dependency_versions_table.py +3 -3
  11. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  12. diffusers/guiders/__init__.py +41 -0
  13. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  14. diffusers/guiders/auto_guidance.py +190 -0
  15. diffusers/guiders/classifier_free_guidance.py +141 -0
  16. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  17. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  18. diffusers/guiders/guider_utils.py +309 -0
  19. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  20. diffusers/guiders/skip_layer_guidance.py +262 -0
  21. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  22. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  23. diffusers/hooks/__init__.py +17 -0
  24. diffusers/hooks/_common.py +56 -0
  25. diffusers/hooks/_helpers.py +293 -0
  26. diffusers/hooks/faster_cache.py +9 -8
  27. diffusers/hooks/first_block_cache.py +259 -0
  28. diffusers/hooks/group_offloading.py +332 -227
  29. diffusers/hooks/hooks.py +58 -3
  30. diffusers/hooks/layer_skip.py +263 -0
  31. diffusers/hooks/layerwise_casting.py +5 -10
  32. diffusers/hooks/pyramid_attention_broadcast.py +15 -12
  33. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  34. diffusers/hooks/utils.py +43 -0
  35. diffusers/image_processor.py +7 -2
  36. diffusers/loaders/__init__.py +10 -0
  37. diffusers/loaders/ip_adapter.py +260 -18
  38. diffusers/loaders/lora_base.py +261 -127
  39. diffusers/loaders/lora_conversion_utils.py +657 -35
  40. diffusers/loaders/lora_pipeline.py +2778 -1246
  41. diffusers/loaders/peft.py +78 -112
  42. diffusers/loaders/single_file.py +2 -2
  43. diffusers/loaders/single_file_model.py +64 -15
  44. diffusers/loaders/single_file_utils.py +395 -7
  45. diffusers/loaders/textual_inversion.py +3 -2
  46. diffusers/loaders/transformer_flux.py +10 -11
  47. diffusers/loaders/transformer_sd3.py +8 -3
  48. diffusers/loaders/unet.py +24 -21
  49. diffusers/loaders/unet_loader_utils.py +6 -3
  50. diffusers/loaders/utils.py +1 -1
  51. diffusers/models/__init__.py +23 -1
  52. diffusers/models/activations.py +5 -5
  53. diffusers/models/adapter.py +2 -3
  54. diffusers/models/attention.py +488 -7
  55. diffusers/models/attention_dispatch.py +1218 -0
  56. diffusers/models/attention_flax.py +10 -10
  57. diffusers/models/attention_processor.py +113 -667
  58. diffusers/models/auto_model.py +49 -12
  59. diffusers/models/autoencoders/__init__.py +2 -0
  60. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  61. diffusers/models/autoencoders/autoencoder_dc.py +17 -4
  62. diffusers/models/autoencoders/autoencoder_kl.py +5 -5
  63. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  64. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  65. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
  66. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  67. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  68. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  69. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  70. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  71. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  72. diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
  73. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  74. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  75. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  76. diffusers/models/autoencoders/vae.py +13 -2
  77. diffusers/models/autoencoders/vq_model.py +2 -2
  78. diffusers/models/cache_utils.py +32 -10
  79. diffusers/models/controlnet.py +1 -1
  80. diffusers/models/controlnet_flux.py +1 -1
  81. diffusers/models/controlnet_sd3.py +1 -1
  82. diffusers/models/controlnet_sparsectrl.py +1 -1
  83. diffusers/models/controlnets/__init__.py +1 -0
  84. diffusers/models/controlnets/controlnet.py +3 -3
  85. diffusers/models/controlnets/controlnet_flax.py +1 -1
  86. diffusers/models/controlnets/controlnet_flux.py +21 -20
  87. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  88. diffusers/models/controlnets/controlnet_sana.py +290 -0
  89. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  90. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  91. diffusers/models/controlnets/controlnet_union.py +5 -5
  92. diffusers/models/controlnets/controlnet_xs.py +7 -7
  93. diffusers/models/controlnets/multicontrolnet.py +4 -5
  94. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  95. diffusers/models/downsampling.py +2 -2
  96. diffusers/models/embeddings.py +36 -46
  97. diffusers/models/embeddings_flax.py +2 -2
  98. diffusers/models/lora.py +3 -3
  99. diffusers/models/model_loading_utils.py +233 -1
  100. diffusers/models/modeling_flax_utils.py +1 -2
  101. diffusers/models/modeling_utils.py +203 -108
  102. diffusers/models/normalization.py +4 -4
  103. diffusers/models/resnet.py +2 -2
  104. diffusers/models/resnet_flax.py +1 -1
  105. diffusers/models/transformers/__init__.py +7 -0
  106. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  107. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  108. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  109. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  110. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  111. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  112. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  113. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  114. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  115. diffusers/models/transformers/prior_transformer.py +1 -1
  116. diffusers/models/transformers/sana_transformer.py +8 -3
  117. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  118. diffusers/models/transformers/t5_film_transformer.py +3 -3
  119. diffusers/models/transformers/transformer_2d.py +1 -1
  120. diffusers/models/transformers/transformer_allegro.py +1 -1
  121. diffusers/models/transformers/transformer_chroma.py +641 -0
  122. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  123. diffusers/models/transformers/transformer_cogview4.py +353 -27
  124. diffusers/models/transformers/transformer_cosmos.py +586 -0
  125. diffusers/models/transformers/transformer_flux.py +376 -138
  126. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  127. diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
  128. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  129. diffusers/models/transformers/transformer_ltx.py +105 -24
  130. diffusers/models/transformers/transformer_lumina2.py +1 -1
  131. diffusers/models/transformers/transformer_mochi.py +1 -1
  132. diffusers/models/transformers/transformer_omnigen.py +2 -2
  133. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  134. diffusers/models/transformers/transformer_sd3.py +7 -7
  135. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  136. diffusers/models/transformers/transformer_temporal.py +1 -1
  137. diffusers/models/transformers/transformer_wan.py +316 -87
  138. diffusers/models/transformers/transformer_wan_vace.py +387 -0
  139. diffusers/models/unets/unet_1d.py +1 -1
  140. diffusers/models/unets/unet_1d_blocks.py +1 -1
  141. diffusers/models/unets/unet_2d.py +1 -1
  142. diffusers/models/unets/unet_2d_blocks.py +1 -1
  143. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  144. diffusers/models/unets/unet_2d_condition.py +4 -3
  145. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  146. diffusers/models/unets/unet_3d_blocks.py +1 -1
  147. diffusers/models/unets/unet_3d_condition.py +3 -3
  148. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  149. diffusers/models/unets/unet_kandinsky3.py +1 -1
  150. diffusers/models/unets/unet_motion_model.py +2 -2
  151. diffusers/models/unets/unet_stable_cascade.py +1 -1
  152. diffusers/models/upsampling.py +2 -2
  153. diffusers/models/vae_flax.py +2 -2
  154. diffusers/models/vq_model.py +1 -1
  155. diffusers/modular_pipelines/__init__.py +83 -0
  156. diffusers/modular_pipelines/components_manager.py +1068 -0
  157. diffusers/modular_pipelines/flux/__init__.py +66 -0
  158. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  159. diffusers/modular_pipelines/flux/decoders.py +109 -0
  160. diffusers/modular_pipelines/flux/denoise.py +227 -0
  161. diffusers/modular_pipelines/flux/encoders.py +412 -0
  162. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  163. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  164. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  165. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  166. diffusers/modular_pipelines/node_utils.py +665 -0
  167. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  168. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  169. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  170. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  171. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  172. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  173. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  174. diffusers/modular_pipelines/wan/__init__.py +66 -0
  175. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  176. diffusers/modular_pipelines/wan/decoders.py +105 -0
  177. diffusers/modular_pipelines/wan/denoise.py +261 -0
  178. diffusers/modular_pipelines/wan/encoders.py +242 -0
  179. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  180. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  181. diffusers/pipelines/__init__.py +68 -6
  182. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  183. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  184. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  185. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  186. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  187. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  188. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  189. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  190. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  191. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  192. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  193. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  194. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
  195. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  196. diffusers/pipelines/auto_pipeline.py +23 -20
  197. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  198. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  199. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  200. diffusers/pipelines/chroma/__init__.py +49 -0
  201. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  202. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  203. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  204. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
  205. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
  206. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
  207. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
  208. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  209. diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
  210. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  211. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  212. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  213. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  214. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  215. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
  216. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  217. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  218. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  219. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  220. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  221. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
  222. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
  223. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
  224. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  225. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  226. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  227. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  228. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  229. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  230. diffusers/pipelines/cosmos/__init__.py +54 -0
  231. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  232. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  233. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  234. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  235. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  236. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  237. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  238. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  239. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  240. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  241. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  242. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  243. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  244. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  245. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  246. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  247. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  248. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  249. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  250. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  251. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  252. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  253. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  254. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  255. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  256. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  257. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
  258. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  259. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  260. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  261. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  262. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  263. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  264. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  265. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  266. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  267. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  268. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  269. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  270. diffusers/pipelines/dit/pipeline_dit.py +4 -2
  271. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  272. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  273. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  274. diffusers/pipelines/flux/__init__.py +4 -0
  275. diffusers/pipelines/flux/modeling_flux.py +1 -1
  276. diffusers/pipelines/flux/pipeline_flux.py +37 -36
  277. diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
  278. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
  279. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
  280. diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
  281. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
  282. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
  283. diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
  284. diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
  285. diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
  286. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  287. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  288. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
  289. diffusers/pipelines/flux/pipeline_output.py +6 -4
  290. diffusers/pipelines/free_init_utils.py +2 -2
  291. diffusers/pipelines/free_noise_utils.py +3 -3
  292. diffusers/pipelines/hidream_image/__init__.py +47 -0
  293. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  294. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  295. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  296. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  297. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
  298. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  299. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  300. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  301. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  302. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  303. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  304. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  305. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  306. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  307. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  308. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  309. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  310. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  311. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  312. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  313. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  314. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  315. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  316. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  317. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  318. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  319. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  320. diffusers/pipelines/kolors/text_encoder.py +3 -3
  321. diffusers/pipelines/kolors/tokenizer.py +1 -1
  322. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  323. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  324. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  325. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  326. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  327. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  328. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  329. diffusers/pipelines/ltx/__init__.py +4 -0
  330. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  331. diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
  332. diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
  333. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
  334. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  335. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  336. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  337. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  338. diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
  339. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  340. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  341. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  342. diffusers/pipelines/onnx_utils.py +15 -2
  343. diffusers/pipelines/pag/pag_utils.py +2 -2
  344. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  345. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  346. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  347. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  348. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  349. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  350. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  351. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  352. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  353. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  354. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  355. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  356. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  357. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  358. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  359. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  360. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  361. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  362. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  363. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  364. diffusers/pipelines/pipeline_flax_utils.py +5 -6
  365. diffusers/pipelines/pipeline_loading_utils.py +113 -15
  366. diffusers/pipelines/pipeline_utils.py +127 -48
  367. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
  368. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
  369. diffusers/pipelines/qwenimage/__init__.py +55 -0
  370. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  371. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  372. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
  373. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  374. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  375. diffusers/pipelines/sana/__init__.py +4 -0
  376. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  377. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  378. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  379. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  380. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  381. diffusers/pipelines/shap_e/camera.py +1 -1
  382. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  383. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  384. diffusers/pipelines/shap_e/renderer.py +3 -3
  385. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  386. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  387. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  388. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  389. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  390. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  391. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  392. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  393. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  394. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  395. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  396. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  397. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  398. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  399. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  400. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  401. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  402. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  403. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
  404. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  405. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
  406. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
  407. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
  408. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  409. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  410. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  411. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  412. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  413. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  414. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  415. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  416. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  417. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  418. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  419. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  420. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
  421. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  422. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  423. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  424. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  425. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  426. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  427. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  428. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  429. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  430. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  431. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  432. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  433. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  434. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  435. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  436. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  437. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  438. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  439. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  440. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  441. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  442. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  443. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  444. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  445. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  446. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  447. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  448. diffusers/pipelines/unclip/text_proj.py +2 -2
  449. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  450. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  451. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  452. diffusers/pipelines/visualcloze/__init__.py +52 -0
  453. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  454. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  455. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  456. diffusers/pipelines/wan/__init__.py +2 -0
  457. diffusers/pipelines/wan/pipeline_wan.py +91 -30
  458. diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
  459. diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
  460. diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
  461. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  462. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  463. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  464. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  465. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  466. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  467. diffusers/quantizers/__init__.py +3 -1
  468. diffusers/quantizers/base.py +17 -1
  469. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  470. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  471. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  472. diffusers/quantizers/gguf/utils.py +108 -16
  473. diffusers/quantizers/pipe_quant_config.py +202 -0
  474. diffusers/quantizers/quantization_config.py +18 -16
  475. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  476. diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
  477. diffusers/schedulers/__init__.py +3 -1
  478. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  479. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  480. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  481. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  482. diffusers/schedulers/scheduling_ddim.py +8 -8
  483. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  484. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  485. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  486. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  487. diffusers/schedulers/scheduling_ddpm.py +9 -9
  488. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  489. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  490. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  491. diffusers/schedulers/scheduling_deis_multistep.py +16 -9
  492. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  493. diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
  494. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  495. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  496. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  497. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
  498. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  499. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  500. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  501. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  502. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  503. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  504. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  505. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  506. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  507. diffusers/schedulers/scheduling_ipndm.py +2 -2
  508. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  509. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  510. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  511. diffusers/schedulers/scheduling_lcm.py +3 -3
  512. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  513. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  514. diffusers/schedulers/scheduling_pndm.py +4 -4
  515. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  516. diffusers/schedulers/scheduling_repaint.py +9 -9
  517. diffusers/schedulers/scheduling_sasolver.py +15 -15
  518. diffusers/schedulers/scheduling_scm.py +1 -2
  519. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  520. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  521. diffusers/schedulers/scheduling_tcd.py +3 -3
  522. diffusers/schedulers/scheduling_unclip.py +5 -5
  523. diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
  524. diffusers/schedulers/scheduling_utils.py +3 -3
  525. diffusers/schedulers/scheduling_utils_flax.py +2 -2
  526. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  527. diffusers/training_utils.py +91 -5
  528. diffusers/utils/__init__.py +15 -0
  529. diffusers/utils/accelerate_utils.py +1 -1
  530. diffusers/utils/constants.py +4 -0
  531. diffusers/utils/doc_utils.py +1 -1
  532. diffusers/utils/dummy_pt_objects.py +432 -0
  533. diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
  534. diffusers/utils/dynamic_modules_utils.py +85 -8
  535. diffusers/utils/export_utils.py +1 -1
  536. diffusers/utils/hub_utils.py +33 -17
  537. diffusers/utils/import_utils.py +151 -18
  538. diffusers/utils/logging.py +1 -1
  539. diffusers/utils/outputs.py +2 -1
  540. diffusers/utils/peft_utils.py +96 -10
  541. diffusers/utils/state_dict_utils.py +20 -3
  542. diffusers/utils/testing_utils.py +195 -17
  543. diffusers/utils/torch_utils.py +43 -5
  544. diffusers/video_processor.py +2 -2
  545. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
  546. diffusers-0.35.0.dist-info/RECORD +703 -0
  547. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
  548. diffusers-0.33.1.dist-info/RECORD +0 -608
  549. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
  550. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
  551. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -177,7 +177,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
177
177
  r"""
178
178
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
179
179
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
180
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
180
+ Flawed](https://huggingface.co/papers/2305.08891).
181
181
 
182
182
  Args:
183
183
  noise_cfg (`torch.Tensor`):
@@ -515,7 +515,7 @@ class EasyAnimateControlPipeline(DiffusionPipeline):
515
515
  def prepare_extra_step_kwargs(self, generator, eta):
516
516
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
517
517
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
518
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
518
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
519
519
  # and should be between [0, 1]
520
520
 
521
521
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -653,7 +653,7 @@ class EasyAnimateControlPipeline(DiffusionPipeline):
653
653
  return self._guidance_rescale
654
654
 
655
655
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
656
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
656
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
657
657
  # corresponds to doing no classifier free guidance.
658
658
  @property
659
659
  def do_classifier_free_guidance(self):
@@ -956,7 +956,7 @@ class EasyAnimateControlPipeline(DiffusionPipeline):
956
956
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
957
957
 
958
958
  if self.do_classifier_free_guidance and guidance_rescale > 0.0:
959
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
959
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
960
960
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
961
961
 
962
962
  # compute the previous noisy sample x_t -> x_t-1
@@ -199,7 +199,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
199
199
  r"""
200
200
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
201
201
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
202
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
202
+ Flawed](https://huggingface.co/papers/2305.08891).
203
203
 
204
204
  Args:
205
205
  noise_cfg (`torch.Tensor`):
@@ -557,7 +557,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
557
557
  def prepare_extra_step_kwargs(self, generator, eta):
558
558
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
559
559
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
560
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
560
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
561
561
  # and should be between [0, 1]
562
562
 
563
563
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -771,7 +771,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
771
771
  return self._guidance_rescale
772
772
 
773
773
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
774
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
774
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
775
775
  # corresponds to doing no classifier free guidance.
776
776
  @property
777
777
  def do_classifier_free_guidance(self):
@@ -849,7 +849,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
849
849
  num_images_per_prompt (`int`, *optional*, defaults to 1):
850
850
  The number of images to generate per prompt.
851
851
  eta (`float`, *optional*, defaults to 0.0):
852
- A parameter defined in the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies to the
852
+ A parameter defined in the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only applies to the
853
853
  [`~schedulers.DDIMScheduler`] and is ignored in other schedulers. It adjusts noise level during the
854
854
  inference process.
855
855
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -883,7 +883,8 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
883
883
  inputs will be passed, facilitating enhanced logging or monitoring of the generation process.
884
884
  guidance_rescale (`float`, *optional*, defaults to 0.0):
885
885
  Rescale parameter for adjusting noise configuration based on guidance rescale. Based on findings from
886
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
886
+ [Common Diffusion Noise Schedules and Sample Steps are
887
+ Flawed](https://huggingface.co/papers/2305.08891).
887
888
  strength (`float`, *optional*, defaults to 1.0):
888
889
  Affects the overall styling or quality of the generated output. Values closer to 1 usually provide
889
890
  direct adherence to prompts.
@@ -1180,7 +1181,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
1180
1181
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
1181
1182
 
1182
1183
  if self.do_classifier_free_guidance and guidance_rescale > 0.0:
1183
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1184
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1184
1185
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
1185
1186
 
1186
1187
  # compute the previous noisy sample x_t -> x_t-1
@@ -33,6 +33,8 @@ else:
33
33
  _import_structure["pipeline_flux_fill"] = ["FluxFillPipeline"]
34
34
  _import_structure["pipeline_flux_img2img"] = ["FluxImg2ImgPipeline"]
35
35
  _import_structure["pipeline_flux_inpaint"] = ["FluxInpaintPipeline"]
36
+ _import_structure["pipeline_flux_kontext"] = ["FluxKontextPipeline"]
37
+ _import_structure["pipeline_flux_kontext_inpaint"] = ["FluxKontextInpaintPipeline"]
36
38
  _import_structure["pipeline_flux_prior_redux"] = ["FluxPriorReduxPipeline"]
37
39
  if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
38
40
  try:
@@ -52,6 +54,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
52
54
  from .pipeline_flux_fill import FluxFillPipeline
53
55
  from .pipeline_flux_img2img import FluxImg2ImgPipeline
54
56
  from .pipeline_flux_inpaint import FluxInpaintPipeline
57
+ from .pipeline_flux_kontext import FluxKontextPipeline
58
+ from .pipeline_flux_kontext_inpaint import FluxKontextInpaintPipeline
55
59
  from .pipeline_flux_prior_redux import FluxPriorReduxPipeline
56
60
  else:
57
61
  import sys
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -310,7 +310,7 @@ class FluxPipeline(
310
310
  def encode_prompt(
311
311
  self,
312
312
  prompt: Union[str, List[str]],
313
- prompt_2: Union[str, List[str]],
313
+ prompt_2: Optional[Union[str, List[str]]] = None,
314
314
  device: Optional[torch.device] = None,
315
315
  num_images_per_prompt: int = 1,
316
316
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -490,14 +490,6 @@ class FluxPipeline(
490
490
  f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
491
491
  )
492
492
 
493
- if prompt_embeds is not None and negative_prompt_embeds is not None:
494
- if prompt_embeds.shape != negative_prompt_embeds.shape:
495
- raise ValueError(
496
- "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
497
- f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
498
- f" {negative_prompt_embeds.shape}."
499
- )
500
-
501
493
  if prompt_embeds is not None and pooled_prompt_embeds is None:
502
494
  raise ValueError(
503
495
  "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
@@ -682,7 +674,8 @@ class FluxPipeline(
682
674
  The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
683
675
  `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders.
684
676
  true_cfg_scale (`float`, *optional*, defaults to 1.0):
685
- When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
677
+ True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and
678
+ `negative_prompt` is provided.
686
679
  height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
687
680
  The height in pixels of the generated image. This is set to 1024 by default for the best results.
688
681
  width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
@@ -695,11 +688,11 @@ class FluxPipeline(
695
688
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
696
689
  will be used.
697
690
  guidance_scale (`float`, *optional*, defaults to 3.5):
698
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
699
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
700
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
701
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
702
- usually at the expense of lower image quality.
691
+ Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
692
+ a model to generate images more aligned with `prompt` at the expense of lower image quality.
693
+
694
+ Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
695
+ the [paper](https://huggingface.co/papers/2210.03142) to learn more.
703
696
  num_images_per_prompt (`int`, *optional*, defaults to 1):
704
697
  The number of images to generate per prompt.
705
698
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -708,7 +701,7 @@ class FluxPipeline(
708
701
  latents (`torch.FloatTensor`, *optional*):
709
702
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
710
703
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
711
- tensor will ge generated by sampling using the supplied random `generator`.
704
+ tensor will be generated by sampling using the supplied random `generator`.
712
705
  prompt_embeds (`torch.FloatTensor`, *optional*):
713
706
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
714
707
  provided, text embeddings will be generated from `prompt` input argument.
@@ -821,7 +814,7 @@ class FluxPipeline(
821
814
  (
822
815
  negative_prompt_embeds,
823
816
  negative_pooled_prompt_embeds,
824
- _,
817
+ negative_text_ids,
825
818
  ) = self.encode_prompt(
826
819
  prompt=negative_prompt,
827
820
  prompt_2=negative_prompt_2,
@@ -848,6 +841,8 @@ class FluxPipeline(
848
841
 
849
842
  # 5. Prepare timesteps
850
843
  sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) if sigmas is None else sigmas
844
+ if hasattr(self.scheduler.config, "use_flow_sigmas") and self.scheduler.config.use_flow_sigmas:
845
+ sigmas = None
851
846
  image_seq_len = latents.shape[1]
852
847
  mu = calculate_shift(
853
848
  image_seq_len,
@@ -906,6 +901,9 @@ class FluxPipeline(
906
901
  )
907
902
 
908
903
  # 6. Denoising loop
904
+ # We set the index here to remove DtoH sync, helpful especially during compilation.
905
+ # Check out more details here: https://github.com/huggingface/diffusers/pull/11696
906
+ self.scheduler.set_begin_index(0)
909
907
  with self.progress_bar(total=num_inference_steps) as progress_bar:
910
908
  for i, t in enumerate(timesteps):
911
909
  if self.interrupt:
@@ -917,32 +915,35 @@ class FluxPipeline(
917
915
  # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
918
916
  timestep = t.expand(latents.shape[0]).to(latents.dtype)
919
917
 
920
- noise_pred = self.transformer(
921
- hidden_states=latents,
922
- timestep=timestep / 1000,
923
- guidance=guidance,
924
- pooled_projections=pooled_prompt_embeds,
925
- encoder_hidden_states=prompt_embeds,
926
- txt_ids=text_ids,
927
- img_ids=latent_image_ids,
928
- joint_attention_kwargs=self.joint_attention_kwargs,
929
- return_dict=False,
930
- )[0]
931
-
932
- if do_true_cfg:
933
- if negative_image_embeds is not None:
934
- self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
935
- neg_noise_pred = self.transformer(
918
+ with self.transformer.cache_context("cond"):
919
+ noise_pred = self.transformer(
936
920
  hidden_states=latents,
937
921
  timestep=timestep / 1000,
938
922
  guidance=guidance,
939
- pooled_projections=negative_pooled_prompt_embeds,
940
- encoder_hidden_states=negative_prompt_embeds,
923
+ pooled_projections=pooled_prompt_embeds,
924
+ encoder_hidden_states=prompt_embeds,
941
925
  txt_ids=text_ids,
942
926
  img_ids=latent_image_ids,
943
927
  joint_attention_kwargs=self.joint_attention_kwargs,
944
928
  return_dict=False,
945
929
  )[0]
930
+
931
+ if do_true_cfg:
932
+ if negative_image_embeds is not None:
933
+ self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
934
+
935
+ with self.transformer.cache_context("uncond"):
936
+ neg_noise_pred = self.transformer(
937
+ hidden_states=latents,
938
+ timestep=timestep / 1000,
939
+ guidance=guidance,
940
+ pooled_projections=negative_pooled_prompt_embeds,
941
+ encoder_hidden_states=negative_prompt_embeds,
942
+ txt_ids=negative_text_ids,
943
+ img_ids=latent_image_ids,
944
+ joint_attention_kwargs=self.joint_attention_kwargs,
945
+ return_dict=False,
946
+ )[0]
946
947
  noise_pred = neg_noise_pred + true_cfg_scale * (noise_pred - neg_noise_pred)
947
948
 
948
949
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -163,9 +163,9 @@ class FluxControlPipeline(
163
163
  TextualInversionLoaderMixin,
164
164
  ):
165
165
  r"""
166
- The Flux pipeline for controllable text-to-image generation.
166
+ The Flux pipeline for controllable text-to-image generation with image conditions.
167
167
 
168
- Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
168
+ Reference: https://bfl.ai/flux-1-tools
169
169
 
170
170
  Args:
171
171
  transformer ([`FluxTransformer2DModel`]):
@@ -324,7 +324,7 @@ class FluxControlPipeline(
324
324
  def encode_prompt(
325
325
  self,
326
326
  prompt: Union[str, List[str]],
327
- prompt_2: Union[str, List[str]],
327
+ prompt_2: Optional[Union[str, List[str]]] = None,
328
328
  device: Optional[torch.device] = None,
329
329
  num_images_per_prompt: int = 1,
330
330
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -661,11 +661,11 @@ class FluxControlPipeline(
661
661
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
662
662
  will be used.
663
663
  guidance_scale (`float`, *optional*, defaults to 3.5):
664
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
665
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
666
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
667
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
668
- usually at the expense of lower image quality.
664
+ Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
665
+ a model to generate images more aligned with prompt at the expense of lower image quality.
666
+
667
+ Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to
668
+ the [paper](https://huggingface.co/papers/2210.03142) to learn more.
669
669
  num_images_per_prompt (`int`, *optional*, defaults to 1):
670
670
  The number of images to generate per prompt.
671
671
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -335,7 +335,7 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
335
335
  def encode_prompt(
336
336
  self,
337
337
  prompt: Union[str, List[str]],
338
- prompt_2: Union[str, List[str]],
338
+ prompt_2: Optional[Union[str, List[str]]] = None,
339
339
  device: Optional[torch.device] = None,
340
340
  num_images_per_prompt: int = 1,
341
341
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -699,11 +699,11 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
699
699
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
700
700
  will be used.
701
701
  guidance_scale (`float`, *optional*, defaults to 7.0):
702
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
703
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
704
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
705
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
706
- usually at the expense of lower image quality.
702
+ Guidance scale as defined in [Classifier-Free Diffusion
703
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
704
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
705
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
706
+ the text `prompt`, usually at the expense of lower image quality.
707
707
  num_images_per_prompt (`int`, *optional*, defaults to 1):
708
708
  The number of images to generate per prompt.
709
709
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -374,7 +374,7 @@ class FluxControlInpaintPipeline(
374
374
  def encode_prompt(
375
375
  self,
376
376
  prompt: Union[str, List[str]],
377
- prompt_2: Union[str, List[str]],
377
+ prompt_2: Optional[Union[str, List[str]]] = None,
378
378
  device: Optional[torch.device] = None,
379
379
  num_images_per_prompt: int = 1,
380
380
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -857,11 +857,11 @@ class FluxControlInpaintPipeline(
857
857
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
858
858
  will be used.
859
859
  guidance_scale (`float`, *optional*, defaults to 7.0):
860
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
861
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
862
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
863
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
864
- usually at the expense of lower image quality.
860
+ Guidance scale as defined in [Classifier-Free Diffusion
861
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
862
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
863
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
864
+ the text `prompt`, usually at the expense of lower image quality.
865
865
  num_images_per_prompt (`int`, *optional*, defaults to 1):
866
866
  The number of images to generate per prompt.
867
867
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -341,7 +341,7 @@ class FluxControlNetPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleF
341
341
  def encode_prompt(
342
342
  self,
343
343
  prompt: Union[str, List[str]],
344
- prompt_2: Union[str, List[str]],
344
+ prompt_2: Optional[Union[str, List[str]]] = None,
345
345
  device: Optional[torch.device] = None,
346
346
  num_images_per_prompt: int = 1,
347
347
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -733,11 +733,11 @@ class FluxControlNetPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleF
733
733
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
734
734
  will be used.
735
735
  guidance_scale (`float`, *optional*, defaults to 7.0):
736
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
737
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
738
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
739
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
740
- usually at the expense of lower image quality.
736
+ Guidance scale as defined in [Classifier-Free Diffusion
737
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
738
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
739
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
740
+ the text `prompt`, usually at the expense of lower image quality.
741
741
  control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
742
742
  The percentage of total steps at which the ControlNet starts applying.
743
743
  control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
@@ -335,7 +335,7 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
335
335
  def encode_prompt(
336
336
  self,
337
337
  prompt: Union[str, List[str]],
338
- prompt_2: Union[str, List[str]],
338
+ prompt_2: Optional[Union[str, List[str]]] = None,
339
339
  device: Optional[torch.device] = None,
340
340
  num_images_per_prompt: int = 1,
341
341
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -687,7 +687,8 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
687
687
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
688
688
  will be used.
689
689
  guidance_scale (`float`, *optional*, defaults to 7.0):
690
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
690
+ Guidance scale as defined in [Classifier-Free Diffusion
691
+ Guidance](https://huggingface.co/papers/2207.12598).
691
692
  control_mode (`int` or `List[int]`, *optional*):
692
693
  The mode for the ControlNet. If multiple ControlNets are used, this should be a list.
693
694
  controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
@@ -800,17 +801,20 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
800
801
  )
801
802
  height, width = control_image.shape[-2:]
802
803
 
803
- control_image = retrieve_latents(self.vae.encode(control_image), generator=generator)
804
- control_image = (control_image - self.vae.config.shift_factor) * self.vae.config.scaling_factor
804
+ # xlab controlnet has a input_hint_block and instantx controlnet does not
805
+ controlnet_blocks_repeat = False if self.controlnet.input_hint_block is None else True
806
+ if self.controlnet.input_hint_block is None:
807
+ control_image = retrieve_latents(self.vae.encode(control_image), generator=generator)
808
+ control_image = (control_image - self.vae.config.shift_factor) * self.vae.config.scaling_factor
805
809
 
806
- height_control_image, width_control_image = control_image.shape[2:]
807
- control_image = self._pack_latents(
808
- control_image,
809
- batch_size * num_images_per_prompt,
810
- num_channels_latents,
811
- height_control_image,
812
- width_control_image,
813
- )
810
+ height_control_image, width_control_image = control_image.shape[2:]
811
+ control_image = self._pack_latents(
812
+ control_image,
813
+ batch_size * num_images_per_prompt,
814
+ num_channels_latents,
815
+ height_control_image,
816
+ width_control_image,
817
+ )
814
818
 
815
819
  if control_mode is not None:
816
820
  control_mode = torch.tensor(control_mode).to(device, dtype=torch.long)
@@ -819,7 +823,9 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
819
823
  elif isinstance(self.controlnet, FluxMultiControlNetModel):
820
824
  control_images = []
821
825
 
822
- for control_image_ in control_image:
826
+ # xlab controlnet has a input_hint_block and instantx controlnet does not
827
+ controlnet_blocks_repeat = False if self.controlnet.nets[0].input_hint_block is None else True
828
+ for i, control_image_ in enumerate(control_image):
823
829
  control_image_ = self.prepare_image(
824
830
  image=control_image_,
825
831
  width=width,
@@ -831,17 +837,18 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
831
837
  )
832
838
  height, width = control_image_.shape[-2:]
833
839
 
834
- control_image_ = retrieve_latents(self.vae.encode(control_image_), generator=generator)
835
- control_image_ = (control_image_ - self.vae.config.shift_factor) * self.vae.config.scaling_factor
840
+ if self.controlnet.nets[0].input_hint_block is None:
841
+ control_image_ = retrieve_latents(self.vae.encode(control_image_), generator=generator)
842
+ control_image_ = (control_image_ - self.vae.config.shift_factor) * self.vae.config.scaling_factor
836
843
 
837
- height_control_image, width_control_image = control_image_.shape[2:]
838
- control_image_ = self._pack_latents(
839
- control_image_,
840
- batch_size * num_images_per_prompt,
841
- num_channels_latents,
842
- height_control_image,
843
- width_control_image,
844
- )
844
+ height_control_image, width_control_image = control_image_.shape[2:]
845
+ control_image_ = self._pack_latents(
846
+ control_image_,
847
+ batch_size * num_images_per_prompt,
848
+ num_channels_latents,
849
+ height_control_image,
850
+ width_control_image,
851
+ )
845
852
 
846
853
  control_images.append(control_image_)
847
854
 
@@ -955,6 +962,7 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
955
962
  img_ids=latent_image_ids,
956
963
  joint_attention_kwargs=self.joint_attention_kwargs,
957
964
  return_dict=False,
965
+ controlnet_blocks_repeat=controlnet_blocks_repeat,
958
966
  )[0]
959
967
 
960
968
  latents_dtype = latents.dtype
@@ -346,7 +346,7 @@ class FluxControlNetInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
346
346
  def encode_prompt(
347
347
  self,
348
348
  prompt: Union[str, List[str]],
349
- prompt_2: Union[str, List[str]],
349
+ prompt_2: Optional[Union[str, List[str]]] = None,
350
350
  device: Optional[torch.device] = None,
351
351
  num_images_per_prompt: int = 1,
352
352
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -801,7 +801,8 @@ class FluxControlNetInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
801
801
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
802
802
  will be used.
803
803
  guidance_scale (`float`, *optional*, defaults to 7.0):
804
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
804
+ Guidance scale as defined in [Classifier-Free Diffusion
805
+ Guidance](https://huggingface.co/papers/2207.12598).
805
806
  control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
806
807
  The percentage of total steps at which the ControlNet starts applying.
807
808
  control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -419,7 +419,7 @@ class FluxFillPipeline(
419
419
  def encode_prompt(
420
420
  self,
421
421
  prompt: Union[str, List[str]],
422
- prompt_2: Union[str, List[str]],
422
+ prompt_2: Optional[Union[str, List[str]]] = None,
423
423
  device: Optional[torch.device] = None,
424
424
  num_images_per_prompt: int = 1,
425
425
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -794,11 +794,11 @@ class FluxFillPipeline(
794
794
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
795
795
  will be used.
796
796
  guidance_scale (`float`, *optional*, defaults to 30.0):
797
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
798
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
799
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
800
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
801
- usually at the expense of lower image quality.
797
+ Guidance scale as defined in [Classifier-Free Diffusion
798
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
799
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
800
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
801
+ the text `prompt`, usually at the expense of lower image quality.
802
802
  num_images_per_prompt (`int`, *optional*, defaults to 1):
803
803
  The number of images to generate per prompt.
804
804
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Black Forest Labs and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -333,7 +333,7 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
333
333
  def encode_prompt(
334
334
  self,
335
335
  prompt: Union[str, List[str]],
336
- prompt_2: Union[str, List[str]],
336
+ prompt_2: Optional[Union[str, List[str]]] = None,
337
337
  device: Optional[torch.device] = None,
338
338
  num_images_per_prompt: int = 1,
339
339
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -607,6 +607,39 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
607
607
 
608
608
  return latents
609
609
 
610
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.enable_vae_slicing
611
+ def enable_vae_slicing(self):
612
+ r"""
613
+ Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
614
+ compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
615
+ """
616
+ self.vae.enable_slicing()
617
+
618
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_slicing
619
+ def disable_vae_slicing(self):
620
+ r"""
621
+ Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
622
+ computing decoding in one step.
623
+ """
624
+ self.vae.disable_slicing()
625
+
626
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.enable_vae_tiling
627
+ def enable_vae_tiling(self):
628
+ r"""
629
+ Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
630
+ compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
631
+ processing larger images.
632
+ """
633
+ self.vae.enable_tiling()
634
+
635
+ # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
636
+ def disable_vae_tiling(self):
637
+ r"""
638
+ Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
639
+ computing decoding in one step.
640
+ """
641
+ self.vae.disable_tiling()
642
+
610
643
  def prepare_latents(
611
644
  self,
612
645
  image,
@@ -741,11 +774,11 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
741
774
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
742
775
  will be used.
743
776
  guidance_scale (`float`, *optional*, defaults to 7.0):
744
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
745
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
746
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
747
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
748
- usually at the expense of lower image quality.
777
+ Guidance scale as defined in [Classifier-Free Diffusion
778
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
779
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
780
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
781
+ the text `prompt`, usually at the expense of lower image quality.
749
782
  num_images_per_prompt (`int`, *optional*, defaults to 1):
750
783
  The number of images to generate per prompt.
751
784
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):