diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (551) hide show
  1. diffusers/__init__.py +145 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/__init__.py +1 -1
  4. diffusers/commands/custom_blocks.py +134 -0
  5. diffusers/commands/diffusers_cli.py +3 -1
  6. diffusers/commands/env.py +1 -1
  7. diffusers/commands/fp16_safetensors.py +2 -2
  8. diffusers/configuration_utils.py +11 -2
  9. diffusers/dependency_versions_check.py +1 -1
  10. diffusers/dependency_versions_table.py +3 -3
  11. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  12. diffusers/guiders/__init__.py +41 -0
  13. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  14. diffusers/guiders/auto_guidance.py +190 -0
  15. diffusers/guiders/classifier_free_guidance.py +141 -0
  16. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  17. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  18. diffusers/guiders/guider_utils.py +309 -0
  19. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  20. diffusers/guiders/skip_layer_guidance.py +262 -0
  21. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  22. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  23. diffusers/hooks/__init__.py +17 -0
  24. diffusers/hooks/_common.py +56 -0
  25. diffusers/hooks/_helpers.py +293 -0
  26. diffusers/hooks/faster_cache.py +9 -8
  27. diffusers/hooks/first_block_cache.py +259 -0
  28. diffusers/hooks/group_offloading.py +332 -227
  29. diffusers/hooks/hooks.py +58 -3
  30. diffusers/hooks/layer_skip.py +263 -0
  31. diffusers/hooks/layerwise_casting.py +5 -10
  32. diffusers/hooks/pyramid_attention_broadcast.py +15 -12
  33. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  34. diffusers/hooks/utils.py +43 -0
  35. diffusers/image_processor.py +7 -2
  36. diffusers/loaders/__init__.py +10 -0
  37. diffusers/loaders/ip_adapter.py +260 -18
  38. diffusers/loaders/lora_base.py +261 -127
  39. diffusers/loaders/lora_conversion_utils.py +657 -35
  40. diffusers/loaders/lora_pipeline.py +2778 -1246
  41. diffusers/loaders/peft.py +78 -112
  42. diffusers/loaders/single_file.py +2 -2
  43. diffusers/loaders/single_file_model.py +64 -15
  44. diffusers/loaders/single_file_utils.py +395 -7
  45. diffusers/loaders/textual_inversion.py +3 -2
  46. diffusers/loaders/transformer_flux.py +10 -11
  47. diffusers/loaders/transformer_sd3.py +8 -3
  48. diffusers/loaders/unet.py +24 -21
  49. diffusers/loaders/unet_loader_utils.py +6 -3
  50. diffusers/loaders/utils.py +1 -1
  51. diffusers/models/__init__.py +23 -1
  52. diffusers/models/activations.py +5 -5
  53. diffusers/models/adapter.py +2 -3
  54. diffusers/models/attention.py +488 -7
  55. diffusers/models/attention_dispatch.py +1218 -0
  56. diffusers/models/attention_flax.py +10 -10
  57. diffusers/models/attention_processor.py +113 -667
  58. diffusers/models/auto_model.py +49 -12
  59. diffusers/models/autoencoders/__init__.py +2 -0
  60. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  61. diffusers/models/autoencoders/autoencoder_dc.py +17 -4
  62. diffusers/models/autoencoders/autoencoder_kl.py +5 -5
  63. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  64. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  65. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
  66. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  67. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  68. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  69. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  70. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  71. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  72. diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
  73. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  74. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  75. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  76. diffusers/models/autoencoders/vae.py +13 -2
  77. diffusers/models/autoencoders/vq_model.py +2 -2
  78. diffusers/models/cache_utils.py +32 -10
  79. diffusers/models/controlnet.py +1 -1
  80. diffusers/models/controlnet_flux.py +1 -1
  81. diffusers/models/controlnet_sd3.py +1 -1
  82. diffusers/models/controlnet_sparsectrl.py +1 -1
  83. diffusers/models/controlnets/__init__.py +1 -0
  84. diffusers/models/controlnets/controlnet.py +3 -3
  85. diffusers/models/controlnets/controlnet_flax.py +1 -1
  86. diffusers/models/controlnets/controlnet_flux.py +21 -20
  87. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  88. diffusers/models/controlnets/controlnet_sana.py +290 -0
  89. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  90. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  91. diffusers/models/controlnets/controlnet_union.py +5 -5
  92. diffusers/models/controlnets/controlnet_xs.py +7 -7
  93. diffusers/models/controlnets/multicontrolnet.py +4 -5
  94. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  95. diffusers/models/downsampling.py +2 -2
  96. diffusers/models/embeddings.py +36 -46
  97. diffusers/models/embeddings_flax.py +2 -2
  98. diffusers/models/lora.py +3 -3
  99. diffusers/models/model_loading_utils.py +233 -1
  100. diffusers/models/modeling_flax_utils.py +1 -2
  101. diffusers/models/modeling_utils.py +203 -108
  102. diffusers/models/normalization.py +4 -4
  103. diffusers/models/resnet.py +2 -2
  104. diffusers/models/resnet_flax.py +1 -1
  105. diffusers/models/transformers/__init__.py +7 -0
  106. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  107. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  108. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  109. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  110. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  111. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  112. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  113. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  114. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  115. diffusers/models/transformers/prior_transformer.py +1 -1
  116. diffusers/models/transformers/sana_transformer.py +8 -3
  117. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  118. diffusers/models/transformers/t5_film_transformer.py +3 -3
  119. diffusers/models/transformers/transformer_2d.py +1 -1
  120. diffusers/models/transformers/transformer_allegro.py +1 -1
  121. diffusers/models/transformers/transformer_chroma.py +641 -0
  122. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  123. diffusers/models/transformers/transformer_cogview4.py +353 -27
  124. diffusers/models/transformers/transformer_cosmos.py +586 -0
  125. diffusers/models/transformers/transformer_flux.py +376 -138
  126. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  127. diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
  128. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  129. diffusers/models/transformers/transformer_ltx.py +105 -24
  130. diffusers/models/transformers/transformer_lumina2.py +1 -1
  131. diffusers/models/transformers/transformer_mochi.py +1 -1
  132. diffusers/models/transformers/transformer_omnigen.py +2 -2
  133. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  134. diffusers/models/transformers/transformer_sd3.py +7 -7
  135. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  136. diffusers/models/transformers/transformer_temporal.py +1 -1
  137. diffusers/models/transformers/transformer_wan.py +316 -87
  138. diffusers/models/transformers/transformer_wan_vace.py +387 -0
  139. diffusers/models/unets/unet_1d.py +1 -1
  140. diffusers/models/unets/unet_1d_blocks.py +1 -1
  141. diffusers/models/unets/unet_2d.py +1 -1
  142. diffusers/models/unets/unet_2d_blocks.py +1 -1
  143. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  144. diffusers/models/unets/unet_2d_condition.py +4 -3
  145. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  146. diffusers/models/unets/unet_3d_blocks.py +1 -1
  147. diffusers/models/unets/unet_3d_condition.py +3 -3
  148. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  149. diffusers/models/unets/unet_kandinsky3.py +1 -1
  150. diffusers/models/unets/unet_motion_model.py +2 -2
  151. diffusers/models/unets/unet_stable_cascade.py +1 -1
  152. diffusers/models/upsampling.py +2 -2
  153. diffusers/models/vae_flax.py +2 -2
  154. diffusers/models/vq_model.py +1 -1
  155. diffusers/modular_pipelines/__init__.py +83 -0
  156. diffusers/modular_pipelines/components_manager.py +1068 -0
  157. diffusers/modular_pipelines/flux/__init__.py +66 -0
  158. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  159. diffusers/modular_pipelines/flux/decoders.py +109 -0
  160. diffusers/modular_pipelines/flux/denoise.py +227 -0
  161. diffusers/modular_pipelines/flux/encoders.py +412 -0
  162. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  163. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  164. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  165. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  166. diffusers/modular_pipelines/node_utils.py +665 -0
  167. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  168. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  169. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  170. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  171. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  172. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  173. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  174. diffusers/modular_pipelines/wan/__init__.py +66 -0
  175. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  176. diffusers/modular_pipelines/wan/decoders.py +105 -0
  177. diffusers/modular_pipelines/wan/denoise.py +261 -0
  178. diffusers/modular_pipelines/wan/encoders.py +242 -0
  179. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  180. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  181. diffusers/pipelines/__init__.py +68 -6
  182. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  183. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  184. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  185. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  186. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  187. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  188. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  189. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  190. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  191. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  192. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  193. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  194. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
  195. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  196. diffusers/pipelines/auto_pipeline.py +23 -20
  197. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  198. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  199. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  200. diffusers/pipelines/chroma/__init__.py +49 -0
  201. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  202. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  203. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  204. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
  205. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
  206. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
  207. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
  208. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  209. diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
  210. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  211. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  212. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  213. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  214. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  215. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
  216. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  217. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  218. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  219. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  220. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  221. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
  222. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
  223. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
  224. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  225. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  226. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  227. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  228. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  229. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  230. diffusers/pipelines/cosmos/__init__.py +54 -0
  231. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  232. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  233. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  234. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  235. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  236. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  237. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  238. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  239. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  240. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  241. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  242. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  243. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  244. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  245. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  246. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  247. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  248. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  249. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  250. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  251. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  252. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  253. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  254. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  255. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  256. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  257. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
  258. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  259. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  260. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  261. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  262. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  263. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  264. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  265. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  266. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  267. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  268. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  269. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  270. diffusers/pipelines/dit/pipeline_dit.py +4 -2
  271. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  272. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  273. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  274. diffusers/pipelines/flux/__init__.py +4 -0
  275. diffusers/pipelines/flux/modeling_flux.py +1 -1
  276. diffusers/pipelines/flux/pipeline_flux.py +37 -36
  277. diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
  278. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
  279. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
  280. diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
  281. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
  282. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
  283. diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
  284. diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
  285. diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
  286. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  287. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  288. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
  289. diffusers/pipelines/flux/pipeline_output.py +6 -4
  290. diffusers/pipelines/free_init_utils.py +2 -2
  291. diffusers/pipelines/free_noise_utils.py +3 -3
  292. diffusers/pipelines/hidream_image/__init__.py +47 -0
  293. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  294. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  295. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  296. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  297. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
  298. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  299. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  300. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  301. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  302. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  303. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  304. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  305. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  306. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  307. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  308. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  309. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  310. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  311. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  312. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  313. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  314. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  315. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  316. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  317. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  318. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  319. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  320. diffusers/pipelines/kolors/text_encoder.py +3 -3
  321. diffusers/pipelines/kolors/tokenizer.py +1 -1
  322. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  323. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  324. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  325. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  326. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  327. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  328. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  329. diffusers/pipelines/ltx/__init__.py +4 -0
  330. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  331. diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
  332. diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
  333. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
  334. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  335. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  336. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  337. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  338. diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
  339. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  340. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  341. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  342. diffusers/pipelines/onnx_utils.py +15 -2
  343. diffusers/pipelines/pag/pag_utils.py +2 -2
  344. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  345. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  346. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  347. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  348. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  349. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  350. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  351. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  352. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  353. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  354. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  355. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  356. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  357. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  358. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  359. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  360. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  361. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  362. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  363. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  364. diffusers/pipelines/pipeline_flax_utils.py +5 -6
  365. diffusers/pipelines/pipeline_loading_utils.py +113 -15
  366. diffusers/pipelines/pipeline_utils.py +127 -48
  367. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
  368. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
  369. diffusers/pipelines/qwenimage/__init__.py +55 -0
  370. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  371. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  372. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
  373. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  374. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  375. diffusers/pipelines/sana/__init__.py +4 -0
  376. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  377. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  378. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  379. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  380. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  381. diffusers/pipelines/shap_e/camera.py +1 -1
  382. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  383. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  384. diffusers/pipelines/shap_e/renderer.py +3 -3
  385. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  386. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  387. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  388. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  389. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  390. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  391. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  392. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  393. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  394. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  395. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  396. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  397. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  398. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  399. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  400. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  401. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  402. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  403. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
  404. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  405. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
  406. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
  407. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
  408. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  409. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  410. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  411. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  412. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  413. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  414. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  415. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  416. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  417. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  418. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  419. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  420. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
  421. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  422. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  423. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  424. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  425. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  426. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  427. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  428. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  429. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  430. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  431. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  432. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  433. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  434. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  435. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  436. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  437. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  438. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  439. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  440. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  441. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  442. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  443. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  444. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  445. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  446. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  447. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  448. diffusers/pipelines/unclip/text_proj.py +2 -2
  449. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  450. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  451. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  452. diffusers/pipelines/visualcloze/__init__.py +52 -0
  453. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  454. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  455. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  456. diffusers/pipelines/wan/__init__.py +2 -0
  457. diffusers/pipelines/wan/pipeline_wan.py +91 -30
  458. diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
  459. diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
  460. diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
  461. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  462. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  463. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  464. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  465. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  466. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  467. diffusers/quantizers/__init__.py +3 -1
  468. diffusers/quantizers/base.py +17 -1
  469. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  470. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  471. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  472. diffusers/quantizers/gguf/utils.py +108 -16
  473. diffusers/quantizers/pipe_quant_config.py +202 -0
  474. diffusers/quantizers/quantization_config.py +18 -16
  475. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  476. diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
  477. diffusers/schedulers/__init__.py +3 -1
  478. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  479. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  480. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  481. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  482. diffusers/schedulers/scheduling_ddim.py +8 -8
  483. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  484. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  485. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  486. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  487. diffusers/schedulers/scheduling_ddpm.py +9 -9
  488. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  489. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  490. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  491. diffusers/schedulers/scheduling_deis_multistep.py +16 -9
  492. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  493. diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
  494. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  495. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  496. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  497. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
  498. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  499. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  500. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  501. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  502. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  503. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  504. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  505. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  506. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  507. diffusers/schedulers/scheduling_ipndm.py +2 -2
  508. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  509. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  510. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  511. diffusers/schedulers/scheduling_lcm.py +3 -3
  512. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  513. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  514. diffusers/schedulers/scheduling_pndm.py +4 -4
  515. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  516. diffusers/schedulers/scheduling_repaint.py +9 -9
  517. diffusers/schedulers/scheduling_sasolver.py +15 -15
  518. diffusers/schedulers/scheduling_scm.py +1 -2
  519. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  520. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  521. diffusers/schedulers/scheduling_tcd.py +3 -3
  522. diffusers/schedulers/scheduling_unclip.py +5 -5
  523. diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
  524. diffusers/schedulers/scheduling_utils.py +3 -3
  525. diffusers/schedulers/scheduling_utils_flax.py +2 -2
  526. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  527. diffusers/training_utils.py +91 -5
  528. diffusers/utils/__init__.py +15 -0
  529. diffusers/utils/accelerate_utils.py +1 -1
  530. diffusers/utils/constants.py +4 -0
  531. diffusers/utils/doc_utils.py +1 -1
  532. diffusers/utils/dummy_pt_objects.py +432 -0
  533. diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
  534. diffusers/utils/dynamic_modules_utils.py +85 -8
  535. diffusers/utils/export_utils.py +1 -1
  536. diffusers/utils/hub_utils.py +33 -17
  537. diffusers/utils/import_utils.py +151 -18
  538. diffusers/utils/logging.py +1 -1
  539. diffusers/utils/outputs.py +2 -1
  540. diffusers/utils/peft_utils.py +96 -10
  541. diffusers/utils/state_dict_utils.py +20 -3
  542. diffusers/utils/testing_utils.py +195 -17
  543. diffusers/utils/torch_utils.py +43 -5
  544. diffusers/video_processor.py +2 -2
  545. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
  546. diffusers-0.35.0.dist-info/RECORD +703 -0
  547. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
  548. diffusers-0.33.1.dist-info/RECORD +0 -608
  549. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
  550. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
  551. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,412 @@
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import html
16
+ from typing import List, Optional, Union
17
+
18
+ import regex as re
19
+ import torch
20
+ from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
21
+
22
+ from ...configuration_utils import FrozenDict
23
+ from ...image_processor import VaeImageProcessor
24
+ from ...loaders import FluxLoraLoaderMixin, TextualInversionLoaderMixin
25
+ from ...models import AutoencoderKL
26
+ from ...utils import USE_PEFT_BACKEND, is_ftfy_available, logging, scale_lora_layers, unscale_lora_layers
27
+ from ..modular_pipeline import ModularPipelineBlocks, PipelineState
28
+ from ..modular_pipeline_utils import ComponentSpec, ConfigSpec, InputParam, OutputParam
29
+ from .modular_pipeline import FluxModularPipeline
30
+
31
+
32
+ if is_ftfy_available():
33
+ import ftfy
34
+
35
+
36
+ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
37
+
38
+
39
+ def basic_clean(text):
40
+ text = ftfy.fix_text(text)
41
+ text = html.unescape(html.unescape(text))
42
+ return text.strip()
43
+
44
+
45
+ def whitespace_clean(text):
46
+ text = re.sub(r"\s+", " ", text)
47
+ text = text.strip()
48
+ return text
49
+
50
+
51
+ def prompt_clean(text):
52
+ text = whitespace_clean(basic_clean(text))
53
+ return text
54
+
55
+
56
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
57
+ def retrieve_latents(
58
+ encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"
59
+ ):
60
+ if hasattr(encoder_output, "latent_dist") and sample_mode == "sample":
61
+ return encoder_output.latent_dist.sample(generator)
62
+ elif hasattr(encoder_output, "latent_dist") and sample_mode == "argmax":
63
+ return encoder_output.latent_dist.mode()
64
+ elif hasattr(encoder_output, "latents"):
65
+ return encoder_output.latents
66
+ else:
67
+ raise AttributeError("Could not access latents of provided encoder_output")
68
+
69
+
70
+ class FluxVaeEncoderStep(ModularPipelineBlocks):
71
+ model_name = "flux"
72
+
73
+ @property
74
+ def description(self) -> str:
75
+ return "Vae Encoder step that encode the input image into a latent representation"
76
+
77
+ @property
78
+ def expected_components(self) -> List[ComponentSpec]:
79
+ return [
80
+ ComponentSpec("vae", AutoencoderKL),
81
+ ComponentSpec(
82
+ "image_processor",
83
+ VaeImageProcessor,
84
+ config=FrozenDict({"vae_scale_factor": 16, "vae_latent_channels": 16}),
85
+ default_creation_method="from_config",
86
+ ),
87
+ ]
88
+
89
+ @property
90
+ def inputs(self) -> List[InputParam]:
91
+ return [
92
+ InputParam("image", required=True),
93
+ InputParam("height"),
94
+ InputParam("width"),
95
+ InputParam("generator"),
96
+ InputParam("dtype", type_hint=torch.dtype, description="Data type of model tensor inputs"),
97
+ InputParam(
98
+ "preprocess_kwargs",
99
+ type_hint=Optional[dict],
100
+ description="A kwargs dictionary that if specified is passed along to the `ImageProcessor` as defined under `self.image_processor` in [diffusers.image_processor.VaeImageProcessor]",
101
+ ),
102
+ ]
103
+
104
+ @property
105
+ def intermediate_outputs(self) -> List[OutputParam]:
106
+ return [
107
+ OutputParam(
108
+ "image_latents",
109
+ type_hint=torch.Tensor,
110
+ description="The latents representing the reference image for image-to-image/inpainting generation",
111
+ )
112
+ ]
113
+
114
+ @staticmethod
115
+ # Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3_inpaint.StableDiffusion3InpaintPipeline._encode_vae_image with self.vae->vae
116
+ def _encode_vae_image(vae, image: torch.Tensor, generator: torch.Generator):
117
+ if isinstance(generator, list):
118
+ image_latents = [
119
+ retrieve_latents(vae.encode(image[i : i + 1]), generator=generator[i]) for i in range(image.shape[0])
120
+ ]
121
+ image_latents = torch.cat(image_latents, dim=0)
122
+ else:
123
+ image_latents = retrieve_latents(vae.encode(image), generator=generator)
124
+
125
+ image_latents = (image_latents - vae.config.shift_factor) * vae.config.scaling_factor
126
+
127
+ return image_latents
128
+
129
+ @torch.no_grad()
130
+ def __call__(self, components: FluxModularPipeline, state: PipelineState) -> PipelineState:
131
+ block_state = self.get_block_state(state)
132
+ block_state.preprocess_kwargs = block_state.preprocess_kwargs or {}
133
+ block_state.device = components._execution_device
134
+ block_state.dtype = block_state.dtype if block_state.dtype is not None else components.vae.dtype
135
+
136
+ block_state.image = components.image_processor.preprocess(
137
+ block_state.image, height=block_state.height, width=block_state.width, **block_state.preprocess_kwargs
138
+ )
139
+ block_state.image = block_state.image.to(device=block_state.device, dtype=block_state.dtype)
140
+
141
+ block_state.batch_size = block_state.image.shape[0]
142
+
143
+ # if generator is a list, make sure the length of it matches the length of images (both should be batch_size)
144
+ if isinstance(block_state.generator, list) and len(block_state.generator) != block_state.batch_size:
145
+ raise ValueError(
146
+ f"You have passed a list of generators of length {len(block_state.generator)}, but requested an effective batch"
147
+ f" size of {block_state.batch_size}. Make sure the batch size matches the length of the generators."
148
+ )
149
+
150
+ block_state.image_latents = self._encode_vae_image(
151
+ components.vae, image=block_state.image, generator=block_state.generator
152
+ )
153
+
154
+ self.set_block_state(state, block_state)
155
+
156
+ return components, state
157
+
158
+
159
+ class FluxTextEncoderStep(ModularPipelineBlocks):
160
+ model_name = "flux"
161
+
162
+ @property
163
+ def description(self) -> str:
164
+ return "Text Encoder step that generate text_embeddings to guide the video generation"
165
+
166
+ @property
167
+ def expected_components(self) -> List[ComponentSpec]:
168
+ return [
169
+ ComponentSpec("text_encoder", CLIPTextModel),
170
+ ComponentSpec("tokenizer", CLIPTokenizer),
171
+ ComponentSpec("text_encoder_2", T5EncoderModel),
172
+ ComponentSpec("tokenizer_2", T5TokenizerFast),
173
+ ]
174
+
175
+ @property
176
+ def expected_configs(self) -> List[ConfigSpec]:
177
+ return []
178
+
179
+ @property
180
+ def inputs(self) -> List[InputParam]:
181
+ return [
182
+ InputParam("prompt"),
183
+ InputParam("prompt_2"),
184
+ InputParam("joint_attention_kwargs"),
185
+ ]
186
+
187
+ @property
188
+ def intermediate_outputs(self) -> List[OutputParam]:
189
+ return [
190
+ OutputParam(
191
+ "prompt_embeds",
192
+ type_hint=torch.Tensor,
193
+ description="text embeddings used to guide the image generation",
194
+ ),
195
+ OutputParam(
196
+ "pooled_prompt_embeds",
197
+ type_hint=torch.Tensor,
198
+ description="pooled text embeddings used to guide the image generation",
199
+ ),
200
+ OutputParam(
201
+ "text_ids",
202
+ type_hint=torch.Tensor,
203
+ description="ids from the text sequence for RoPE",
204
+ ),
205
+ ]
206
+
207
+ @staticmethod
208
+ def check_inputs(block_state):
209
+ for prompt in [block_state.prompt, block_state.prompt_2]:
210
+ if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
211
+ raise ValueError(f"`prompt` or `prompt_2` has to be of type `str` or `list` but is {type(prompt)}")
212
+
213
+ @staticmethod
214
+ def _get_t5_prompt_embeds(
215
+ components,
216
+ prompt: Union[str, List[str]],
217
+ num_images_per_prompt: int,
218
+ max_sequence_length: int,
219
+ device: torch.device,
220
+ ):
221
+ dtype = components.text_encoder_2.dtype
222
+
223
+ prompt = [prompt] if isinstance(prompt, str) else prompt
224
+ batch_size = len(prompt)
225
+
226
+ if isinstance(components, TextualInversionLoaderMixin):
227
+ prompt = components.maybe_convert_prompt(prompt, components.tokenizer_2)
228
+
229
+ text_inputs = components.tokenizer_2(
230
+ prompt,
231
+ padding="max_length",
232
+ max_length=max_sequence_length,
233
+ truncation=True,
234
+ return_length=False,
235
+ return_overflowing_tokens=False,
236
+ return_tensors="pt",
237
+ )
238
+ text_input_ids = text_inputs.input_ids
239
+
240
+ untruncated_ids = components.tokenizer_2(prompt, padding="longest", return_tensors="pt").input_ids
241
+ if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
242
+ removed_text = components.tokenizer_2.batch_decode(untruncated_ids[:, max_sequence_length - 1 : -1])
243
+ logger.warning(
244
+ "The following part of your input was truncated because `max_sequence_length` is set to "
245
+ f" {max_sequence_length} tokens: {removed_text}"
246
+ )
247
+
248
+ prompt_embeds = components.text_encoder_2(text_input_ids.to(device), output_hidden_states=False)[0]
249
+ prompt_embeds = prompt_embeds.to(dtype=dtype, device=device)
250
+ _, seq_len, _ = prompt_embeds.shape
251
+
252
+ # duplicate text embeddings and attention mask for each generation per prompt, using mps friendly method
253
+ prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
254
+ prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
255
+
256
+ return prompt_embeds
257
+
258
+ @staticmethod
259
+ def _get_clip_prompt_embeds(
260
+ components,
261
+ prompt: Union[str, List[str]],
262
+ num_images_per_prompt: int,
263
+ device: torch.device,
264
+ ):
265
+ prompt = [prompt] if isinstance(prompt, str) else prompt
266
+ batch_size = len(prompt)
267
+
268
+ if isinstance(components, TextualInversionLoaderMixin):
269
+ prompt = components.maybe_convert_prompt(prompt, components.tokenizer)
270
+
271
+ text_inputs = components.tokenizer(
272
+ prompt,
273
+ padding="max_length",
274
+ max_length=components.tokenizer.model_max_length,
275
+ truncation=True,
276
+ return_overflowing_tokens=False,
277
+ return_length=False,
278
+ return_tensors="pt",
279
+ )
280
+
281
+ text_input_ids = text_inputs.input_ids
282
+ tokenizer_max_length = components.tokenizer.model_max_length
283
+ untruncated_ids = components.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
284
+ if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
285
+ removed_text = components.tokenizer.batch_decode(untruncated_ids[:, tokenizer_max_length - 1 : -1])
286
+ logger.warning(
287
+ "The following part of your input was truncated because CLIP can only handle sequences up to"
288
+ f" {tokenizer_max_length} tokens: {removed_text}"
289
+ )
290
+ prompt_embeds = components.text_encoder(text_input_ids.to(device), output_hidden_states=False)
291
+
292
+ # Use pooled output of CLIPTextModel
293
+ prompt_embeds = prompt_embeds.pooler_output
294
+ prompt_embeds = prompt_embeds.to(dtype=components.text_encoder.dtype, device=device)
295
+
296
+ # duplicate text embeddings for each generation per prompt, using mps friendly method
297
+ prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt)
298
+ prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, -1)
299
+
300
+ return prompt_embeds
301
+
302
+ @staticmethod
303
+ def encode_prompt(
304
+ components,
305
+ prompt: Union[str, List[str]],
306
+ prompt_2: Union[str, List[str]],
307
+ device: Optional[torch.device] = None,
308
+ num_images_per_prompt: int = 1,
309
+ prompt_embeds: Optional[torch.FloatTensor] = None,
310
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
311
+ max_sequence_length: int = 512,
312
+ lora_scale: Optional[float] = None,
313
+ ):
314
+ r"""
315
+ Encodes the prompt into text encoder hidden states.
316
+
317
+ Args:
318
+ prompt (`str` or `List[str]`, *optional*):
319
+ prompt to be encoded
320
+ prompt_2 (`str` or `List[str]`, *optional*):
321
+ The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
322
+ used in all text-encoders
323
+ device: (`torch.device`):
324
+ torch device
325
+ num_images_per_prompt (`int`):
326
+ number of images that should be generated per prompt
327
+ prompt_embeds (`torch.FloatTensor`, *optional*):
328
+ Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
329
+ provided, text embeddings will be generated from `prompt` input argument.
330
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
331
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
332
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
333
+ lora_scale (`float`, *optional*):
334
+ A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
335
+ """
336
+ device = device or components._execution_device
337
+
338
+ # set lora scale so that monkey patched LoRA
339
+ # function of text encoder can correctly access it
340
+ if lora_scale is not None and isinstance(components, FluxLoraLoaderMixin):
341
+ components._lora_scale = lora_scale
342
+
343
+ # dynamically adjust the LoRA scale
344
+ if components.text_encoder is not None and USE_PEFT_BACKEND:
345
+ scale_lora_layers(components.text_encoder, lora_scale)
346
+ if components.text_encoder_2 is not None and USE_PEFT_BACKEND:
347
+ scale_lora_layers(components.text_encoder_2, lora_scale)
348
+
349
+ prompt = [prompt] if isinstance(prompt, str) else prompt
350
+
351
+ if prompt_embeds is None:
352
+ prompt_2 = prompt_2 or prompt
353
+ prompt_2 = [prompt_2] if isinstance(prompt_2, str) else prompt_2
354
+
355
+ # We only use the pooled prompt output from the CLIPTextModel
356
+ pooled_prompt_embeds = FluxTextEncoderStep._get_clip_prompt_embeds(
357
+ components,
358
+ prompt=prompt,
359
+ device=device,
360
+ num_images_per_prompt=num_images_per_prompt,
361
+ )
362
+ prompt_embeds = FluxTextEncoderStep._get_t5_prompt_embeds(
363
+ components,
364
+ prompt=prompt_2,
365
+ num_images_per_prompt=num_images_per_prompt,
366
+ max_sequence_length=max_sequence_length,
367
+ device=device,
368
+ )
369
+
370
+ if components.text_encoder is not None:
371
+ if isinstance(components, FluxLoraLoaderMixin) and USE_PEFT_BACKEND:
372
+ # Retrieve the original scale by scaling back the LoRA layers
373
+ unscale_lora_layers(components.text_encoder, lora_scale)
374
+
375
+ if components.text_encoder_2 is not None:
376
+ if isinstance(components, FluxLoraLoaderMixin) and USE_PEFT_BACKEND:
377
+ # Retrieve the original scale by scaling back the LoRA layers
378
+ unscale_lora_layers(components.text_encoder_2, lora_scale)
379
+
380
+ dtype = components.text_encoder.dtype if components.text_encoder is not None else torch.bfloat16
381
+ text_ids = torch.zeros(prompt_embeds.shape[1], 3).to(device=device, dtype=dtype)
382
+
383
+ return prompt_embeds, pooled_prompt_embeds, text_ids
384
+
385
+ @torch.no_grad()
386
+ def __call__(self, components: FluxModularPipeline, state: PipelineState) -> PipelineState:
387
+ # Get inputs and intermediates
388
+ block_state = self.get_block_state(state)
389
+ self.check_inputs(block_state)
390
+
391
+ block_state.device = components._execution_device
392
+
393
+ # Encode input prompt
394
+ block_state.text_encoder_lora_scale = (
395
+ block_state.joint_attention_kwargs.get("scale", None)
396
+ if block_state.joint_attention_kwargs is not None
397
+ else None
398
+ )
399
+ (block_state.prompt_embeds, block_state.pooled_prompt_embeds, block_state.text_ids) = self.encode_prompt(
400
+ components,
401
+ prompt=block_state.prompt,
402
+ prompt_2=None,
403
+ prompt_embeds=None,
404
+ pooled_prompt_embeds=None,
405
+ device=block_state.device,
406
+ num_images_per_prompt=1, # TODO: hardcoded for now.
407
+ lora_scale=block_state.text_encoder_lora_scale,
408
+ )
409
+
410
+ # Add outputs
411
+ self.set_block_state(state, block_state)
412
+ return components, state
@@ -0,0 +1,181 @@
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...utils import logging
16
+ from ..modular_pipeline import AutoPipelineBlocks, SequentialPipelineBlocks
17
+ from ..modular_pipeline_utils import InsertableDict
18
+ from .before_denoise import (
19
+ FluxImg2ImgPrepareLatentsStep,
20
+ FluxImg2ImgSetTimestepsStep,
21
+ FluxInputStep,
22
+ FluxPrepareLatentsStep,
23
+ FluxSetTimestepsStep,
24
+ )
25
+ from .decoders import FluxDecodeStep
26
+ from .denoise import FluxDenoiseStep
27
+ from .encoders import FluxTextEncoderStep, FluxVaeEncoderStep
28
+
29
+
30
+ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
31
+
32
+
33
+ # vae encoder (run before before_denoise)
34
+ class FluxAutoVaeEncoderStep(AutoPipelineBlocks):
35
+ block_classes = [FluxVaeEncoderStep]
36
+ block_names = ["img2img"]
37
+ block_trigger_inputs = ["image"]
38
+
39
+ @property
40
+ def description(self):
41
+ return (
42
+ "Vae encoder step that encode the image inputs into their latent representations.\n"
43
+ + "This is an auto pipeline block that works for img2img tasks.\n"
44
+ + " - `FluxVaeEncoderStep` (img2img) is used when only `image` is provided."
45
+ + " - if `image` is provided, step will be skipped."
46
+ )
47
+
48
+
49
+ # before_denoise: text2img, img2img
50
+ class FluxBeforeDenoiseStep(SequentialPipelineBlocks):
51
+ block_classes = [
52
+ FluxInputStep,
53
+ FluxPrepareLatentsStep,
54
+ FluxSetTimestepsStep,
55
+ ]
56
+ block_names = ["input", "prepare_latents", "set_timesteps"]
57
+
58
+ @property
59
+ def description(self):
60
+ return (
61
+ "Before denoise step that prepare the inputs for the denoise step.\n"
62
+ + "This is a sequential pipeline blocks:\n"
63
+ + " - `FluxInputStep` is used to adjust the batch size of the model inputs\n"
64
+ + " - `FluxPrepareLatentsStep` is used to prepare the latents\n"
65
+ + " - `FluxSetTimestepsStep` is used to set the timesteps\n"
66
+ )
67
+
68
+
69
+ # before_denoise: img2img
70
+ class FluxImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
71
+ block_classes = [FluxInputStep, FluxImg2ImgSetTimestepsStep, FluxImg2ImgPrepareLatentsStep]
72
+ block_names = ["input", "set_timesteps", "prepare_latents"]
73
+
74
+ @property
75
+ def description(self):
76
+ return (
77
+ "Before denoise step that prepare the inputs for the denoise step for img2img task.\n"
78
+ + "This is a sequential pipeline blocks:\n"
79
+ + " - `FluxInputStep` is used to adjust the batch size of the model inputs\n"
80
+ + " - `FluxImg2ImgSetTimestepsStep` is used to set the timesteps\n"
81
+ + " - `FluxImg2ImgPrepareLatentsStep` is used to prepare the latents\n"
82
+ )
83
+
84
+
85
+ # before_denoise: all task (text2img, img2img)
86
+ class FluxAutoBeforeDenoiseStep(AutoPipelineBlocks):
87
+ block_classes = [FluxBeforeDenoiseStep, FluxImg2ImgBeforeDenoiseStep]
88
+ block_names = ["text2image", "img2img"]
89
+ block_trigger_inputs = [None, "image_latents"]
90
+
91
+ @property
92
+ def description(self):
93
+ return (
94
+ "Before denoise step that prepare the inputs for the denoise step.\n"
95
+ + "This is an auto pipeline block that works for text2image.\n"
96
+ + " - `FluxBeforeDenoiseStep` (text2image) is used.\n"
97
+ + " - `FluxImg2ImgBeforeDenoiseStep` (img2img) is used when only `image_latents` is provided.\n"
98
+ )
99
+
100
+
101
+ # denoise: text2image
102
+ class FluxAutoDenoiseStep(AutoPipelineBlocks):
103
+ block_classes = [FluxDenoiseStep]
104
+ block_names = ["denoise"]
105
+ block_trigger_inputs = [None]
106
+
107
+ @property
108
+ def description(self) -> str:
109
+ return (
110
+ "Denoise step that iteratively denoise the latents. "
111
+ "This is a auto pipeline block that works for text2image and img2img tasks."
112
+ " - `FluxDenoiseStep` (denoise) for text2image and img2img tasks."
113
+ )
114
+
115
+
116
+ # decode: all task (text2img, img2img, inpainting)
117
+ class FluxAutoDecodeStep(AutoPipelineBlocks):
118
+ block_classes = [FluxDecodeStep]
119
+ block_names = ["non-inpaint"]
120
+ block_trigger_inputs = [None]
121
+
122
+ @property
123
+ def description(self):
124
+ return "Decode step that decode the denoised latents into image outputs.\n - `FluxDecodeStep`"
125
+
126
+
127
+ # text2image
128
+ class FluxAutoBlocks(SequentialPipelineBlocks):
129
+ block_classes = [
130
+ FluxTextEncoderStep,
131
+ FluxAutoVaeEncoderStep,
132
+ FluxAutoBeforeDenoiseStep,
133
+ FluxAutoDenoiseStep,
134
+ FluxAutoDecodeStep,
135
+ ]
136
+ block_names = ["text_encoder", "image_encoder", "before_denoise", "denoise", "decoder"]
137
+
138
+ @property
139
+ def description(self):
140
+ return (
141
+ "Auto Modular pipeline for text-to-image and image-to-image using Flux.\n"
142
+ + "- for text-to-image generation, all you need to provide is `prompt`\n"
143
+ + "- for image-to-image generation, you need to provide either `image` or `image_latents`"
144
+ )
145
+
146
+
147
+ TEXT2IMAGE_BLOCKS = InsertableDict(
148
+ [
149
+ ("text_encoder", FluxTextEncoderStep),
150
+ ("input", FluxInputStep),
151
+ ("set_timesteps", FluxSetTimestepsStep),
152
+ ("prepare_latents", FluxPrepareLatentsStep),
153
+ ("denoise", FluxDenoiseStep),
154
+ ("decode", FluxDecodeStep),
155
+ ]
156
+ )
157
+
158
+ IMAGE2IMAGE_BLOCKS = InsertableDict(
159
+ [
160
+ ("text_encoder", FluxTextEncoderStep),
161
+ ("image_encoder", FluxVaeEncoderStep),
162
+ ("input", FluxInputStep),
163
+ ("set_timesteps", FluxImg2ImgSetTimestepsStep),
164
+ ("prepare_latents", FluxImg2ImgPrepareLatentsStep),
165
+ ("denoise", FluxDenoiseStep),
166
+ ("decode", FluxDecodeStep),
167
+ ]
168
+ )
169
+
170
+ AUTO_BLOCKS = InsertableDict(
171
+ [
172
+ ("text_encoder", FluxTextEncoderStep),
173
+ ("image_encoder", FluxAutoVaeEncoderStep),
174
+ ("before_denoise", FluxAutoBeforeDenoiseStep),
175
+ ("denoise", FluxAutoDenoiseStep),
176
+ ("decode", FluxAutoDecodeStep),
177
+ ]
178
+ )
179
+
180
+
181
+ ALL_BLOCKS = {"text2image": TEXT2IMAGE_BLOCKS, "img2img": IMAGE2IMAGE_BLOCKS, "auto": AUTO_BLOCKS}
@@ -0,0 +1,59 @@
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from ...loaders import FluxLoraLoaderMixin, TextualInversionLoaderMixin
17
+ from ...utils import logging
18
+ from ..modular_pipeline import ModularPipeline
19
+
20
+
21
+ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
22
+
23
+
24
+ class FluxModularPipeline(ModularPipeline, FluxLoraLoaderMixin, TextualInversionLoaderMixin):
25
+ """
26
+ A ModularPipeline for Flux.
27
+
28
+ <Tip warning={true}>
29
+
30
+ This is an experimental feature and is likely to change in the future.
31
+
32
+ </Tip>
33
+ """
34
+
35
+ @property
36
+ def default_height(self):
37
+ return self.default_sample_size * self.vae_scale_factor
38
+
39
+ @property
40
+ def default_width(self):
41
+ return self.default_sample_size * self.vae_scale_factor
42
+
43
+ @property
44
+ def default_sample_size(self):
45
+ return 128
46
+
47
+ @property
48
+ def vae_scale_factor(self):
49
+ vae_scale_factor = 8
50
+ if getattr(self, "vae", None) is not None:
51
+ vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
52
+ return vae_scale_factor
53
+
54
+ @property
55
+ def num_channels_latents(self):
56
+ num_channels_latents = 16
57
+ if getattr(self, "transformer", None):
58
+ num_channels_latents = self.transformer.config.in_channels // 4
59
+ return num_channels_latents