diffusers 0.31.0__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. diffusers/__init__.py +66 -5
  2. diffusers/callbacks.py +56 -3
  3. diffusers/configuration_utils.py +1 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/image_processor.py +25 -17
  6. diffusers/loaders/__init__.py +22 -3
  7. diffusers/loaders/ip_adapter.py +538 -15
  8. diffusers/loaders/lora_base.py +124 -118
  9. diffusers/loaders/lora_conversion_utils.py +318 -3
  10. diffusers/loaders/lora_pipeline.py +1688 -368
  11. diffusers/loaders/peft.py +379 -0
  12. diffusers/loaders/single_file_model.py +71 -4
  13. diffusers/loaders/single_file_utils.py +519 -9
  14. diffusers/loaders/textual_inversion.py +3 -3
  15. diffusers/loaders/transformer_flux.py +181 -0
  16. diffusers/loaders/transformer_sd3.py +89 -0
  17. diffusers/loaders/unet.py +17 -4
  18. diffusers/models/__init__.py +47 -14
  19. diffusers/models/activations.py +22 -9
  20. diffusers/models/attention.py +13 -4
  21. diffusers/models/attention_flax.py +1 -1
  22. diffusers/models/attention_processor.py +2059 -281
  23. diffusers/models/autoencoders/__init__.py +5 -0
  24. diffusers/models/autoencoders/autoencoder_dc.py +620 -0
  25. diffusers/models/autoencoders/autoencoder_kl.py +2 -1
  26. diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
  27. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +36 -27
  28. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
  29. diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
  30. diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
  31. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
  32. diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
  33. diffusers/models/autoencoders/vae.py +18 -5
  34. diffusers/models/controlnet.py +47 -802
  35. diffusers/models/controlnet_flux.py +29 -495
  36. diffusers/models/controlnet_sd3.py +25 -379
  37. diffusers/models/controlnet_sparsectrl.py +46 -718
  38. diffusers/models/controlnets/__init__.py +23 -0
  39. diffusers/models/controlnets/controlnet.py +872 -0
  40. diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
  41. diffusers/models/controlnets/controlnet_flux.py +536 -0
  42. diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
  43. diffusers/models/controlnets/controlnet_sd3.py +489 -0
  44. diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
  45. diffusers/models/controlnets/controlnet_union.py +832 -0
  46. diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
  47. diffusers/models/controlnets/multicontrolnet.py +183 -0
  48. diffusers/models/embeddings.py +838 -43
  49. diffusers/models/model_loading_utils.py +88 -6
  50. diffusers/models/modeling_flax_utils.py +1 -1
  51. diffusers/models/modeling_utils.py +74 -28
  52. diffusers/models/normalization.py +78 -13
  53. diffusers/models/transformers/__init__.py +5 -0
  54. diffusers/models/transformers/auraflow_transformer_2d.py +2 -2
  55. diffusers/models/transformers/cogvideox_transformer_3d.py +46 -11
  56. diffusers/models/transformers/dit_transformer_2d.py +1 -1
  57. diffusers/models/transformers/latte_transformer_3d.py +4 -4
  58. diffusers/models/transformers/pixart_transformer_2d.py +1 -1
  59. diffusers/models/transformers/sana_transformer.py +488 -0
  60. diffusers/models/transformers/stable_audio_transformer.py +1 -1
  61. diffusers/models/transformers/transformer_2d.py +1 -1
  62. diffusers/models/transformers/transformer_allegro.py +422 -0
  63. diffusers/models/transformers/transformer_cogview3plus.py +1 -1
  64. diffusers/models/transformers/transformer_flux.py +30 -9
  65. diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
  66. diffusers/models/transformers/transformer_ltx.py +469 -0
  67. diffusers/models/transformers/transformer_mochi.py +499 -0
  68. diffusers/models/transformers/transformer_sd3.py +105 -17
  69. diffusers/models/transformers/transformer_temporal.py +1 -1
  70. diffusers/models/unets/unet_1d_blocks.py +1 -1
  71. diffusers/models/unets/unet_2d.py +8 -1
  72. diffusers/models/unets/unet_2d_blocks.py +88 -21
  73. diffusers/models/unets/unet_2d_condition.py +1 -1
  74. diffusers/models/unets/unet_3d_blocks.py +9 -7
  75. diffusers/models/unets/unet_motion_model.py +5 -5
  76. diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
  77. diffusers/models/unets/unet_stable_cascade.py +2 -2
  78. diffusers/models/unets/uvit_2d.py +1 -1
  79. diffusers/models/upsampling.py +8 -0
  80. diffusers/pipelines/__init__.py +34 -0
  81. diffusers/pipelines/allegro/__init__.py +48 -0
  82. diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
  83. diffusers/pipelines/allegro/pipeline_output.py +23 -0
  84. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +8 -2
  85. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1 -1
  86. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +0 -6
  87. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +8 -8
  88. diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
  89. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +1 -8
  90. diffusers/pipelines/auto_pipeline.py +53 -6
  91. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  92. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +50 -22
  93. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +51 -20
  94. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +69 -21
  95. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +47 -21
  96. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +1 -1
  97. diffusers/pipelines/controlnet/__init__.py +86 -80
  98. diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
  99. diffusers/pipelines/controlnet/pipeline_controlnet.py +11 -2
  100. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +1 -2
  101. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +1 -2
  102. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +1 -2
  103. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +3 -3
  104. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +1 -3
  105. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
  106. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
  107. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
  108. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +5 -1
  109. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +53 -19
  110. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  111. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +31 -8
  112. diffusers/pipelines/flux/__init__.py +13 -1
  113. diffusers/pipelines/flux/modeling_flux.py +47 -0
  114. diffusers/pipelines/flux/pipeline_flux.py +204 -29
  115. diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
  116. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
  117. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
  118. diffusers/pipelines/flux/pipeline_flux_controlnet.py +49 -27
  119. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +40 -30
  120. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +78 -56
  121. diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
  122. diffusers/pipelines/flux/pipeline_flux_img2img.py +33 -27
  123. diffusers/pipelines/flux/pipeline_flux_inpaint.py +36 -29
  124. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
  125. diffusers/pipelines/flux/pipeline_output.py +16 -0
  126. diffusers/pipelines/hunyuan_video/__init__.py +48 -0
  127. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
  128. diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
  129. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +5 -1
  130. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
  131. diffusers/pipelines/kolors/text_encoder.py +2 -2
  132. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  133. diffusers/pipelines/ltx/__init__.py +50 -0
  134. diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
  135. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
  136. diffusers/pipelines/ltx/pipeline_output.py +20 -0
  137. diffusers/pipelines/lumina/pipeline_lumina.py +1 -8
  138. diffusers/pipelines/mochi/__init__.py +48 -0
  139. diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
  140. diffusers/pipelines/mochi/pipeline_output.py +20 -0
  141. diffusers/pipelines/pag/__init__.py +7 -0
  142. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1 -2
  143. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1 -2
  144. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1 -3
  145. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1 -3
  146. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +5 -1
  147. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +6 -13
  148. diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
  149. diffusers/pipelines/pag/pipeline_pag_sd_3.py +6 -6
  150. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
  151. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +3 -0
  152. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
  153. diffusers/pipelines/pipeline_flax_utils.py +1 -1
  154. diffusers/pipelines/pipeline_loading_utils.py +25 -4
  155. diffusers/pipelines/pipeline_utils.py +35 -6
  156. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +6 -13
  157. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +6 -13
  158. diffusers/pipelines/sana/__init__.py +47 -0
  159. diffusers/pipelines/sana/pipeline_output.py +21 -0
  160. diffusers/pipelines/sana/pipeline_sana.py +884 -0
  161. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
  162. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -3
  163. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +216 -20
  164. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +62 -9
  165. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +57 -8
  166. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
  167. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -8
  168. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -8
  169. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -8
  170. diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
  171. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  172. diffusers/quantizers/auto.py +14 -1
  173. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -1
  174. diffusers/quantizers/gguf/__init__.py +1 -0
  175. diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
  176. diffusers/quantizers/gguf/utils.py +456 -0
  177. diffusers/quantizers/quantization_config.py +280 -2
  178. diffusers/quantizers/torchao/__init__.py +15 -0
  179. diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
  180. diffusers/schedulers/scheduling_ddpm.py +2 -6
  181. diffusers/schedulers/scheduling_ddpm_parallel.py +2 -6
  182. diffusers/schedulers/scheduling_deis_multistep.py +28 -9
  183. diffusers/schedulers/scheduling_dpmsolver_multistep.py +35 -9
  184. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +35 -8
  185. diffusers/schedulers/scheduling_dpmsolver_sde.py +4 -4
  186. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +48 -10
  187. diffusers/schedulers/scheduling_euler_discrete.py +4 -4
  188. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
  189. diffusers/schedulers/scheduling_heun_discrete.py +4 -4
  190. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +4 -4
  191. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +4 -4
  192. diffusers/schedulers/scheduling_lcm.py +2 -6
  193. diffusers/schedulers/scheduling_lms_discrete.py +4 -4
  194. diffusers/schedulers/scheduling_repaint.py +1 -1
  195. diffusers/schedulers/scheduling_sasolver.py +28 -9
  196. diffusers/schedulers/scheduling_tcd.py +2 -6
  197. diffusers/schedulers/scheduling_unipc_multistep.py +53 -8
  198. diffusers/training_utils.py +16 -2
  199. diffusers/utils/__init__.py +5 -0
  200. diffusers/utils/constants.py +1 -0
  201. diffusers/utils/dummy_pt_objects.py +180 -0
  202. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  203. diffusers/utils/dynamic_modules_utils.py +3 -3
  204. diffusers/utils/hub_utils.py +31 -39
  205. diffusers/utils/import_utils.py +67 -0
  206. diffusers/utils/peft_utils.py +3 -0
  207. diffusers/utils/testing_utils.py +56 -1
  208. diffusers/utils/torch_utils.py +3 -0
  209. {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/METADATA +69 -69
  210. {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/RECORD +214 -162
  211. {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
  212. {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
  213. {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
  214. {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0
diffusers/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.31.0"
1
+ __version__ = "0.32.0"
2
2
 
3
3
  from typing import TYPE_CHECKING
4
4
 
@@ -31,7 +31,7 @@ _import_structure = {
31
31
  "loaders": ["FromOriginalModelMixin"],
32
32
  "models": [],
33
33
  "pipelines": [],
34
- "quantizers.quantization_config": ["BitsAndBytesConfig"],
34
+ "quantizers.quantization_config": ["BitsAndBytesConfig", "GGUFQuantizationConfig", "TorchAoConfig"],
35
35
  "schedulers": [],
36
36
  "utils": [
37
37
  "OptionalDependencyNotAvailable",
@@ -77,10 +77,16 @@ except OptionalDependencyNotAvailable:
77
77
  else:
78
78
  _import_structure["models"].extend(
79
79
  [
80
+ "AllegroTransformer3DModel",
80
81
  "AsymmetricAutoencoderKL",
81
82
  "AuraFlowTransformer2DModel",
83
+ "AutoencoderDC",
82
84
  "AutoencoderKL",
85
+ "AutoencoderKLAllegro",
83
86
  "AutoencoderKLCogVideoX",
87
+ "AutoencoderKLHunyuanVideo",
88
+ "AutoencoderKLLTXVideo",
89
+ "AutoencoderKLMochi",
84
90
  "AutoencoderKLTemporalDecoder",
85
91
  "AutoencoderOobleck",
86
92
  "AutoencoderTiny",
@@ -88,6 +94,7 @@ else:
88
94
  "CogView3PlusTransformer2DModel",
89
95
  "ConsistencyDecoderVAE",
90
96
  "ControlNetModel",
97
+ "ControlNetUnionModel",
91
98
  "ControlNetXSAdapter",
92
99
  "DiTTransformer2DModel",
93
100
  "FluxControlNetModel",
@@ -96,15 +103,20 @@ else:
96
103
  "HunyuanDiT2DControlNetModel",
97
104
  "HunyuanDiT2DModel",
98
105
  "HunyuanDiT2DMultiControlNetModel",
106
+ "HunyuanVideoTransformer3DModel",
99
107
  "I2VGenXLUNet",
100
108
  "Kandinsky3UNet",
101
109
  "LatteTransformer3DModel",
110
+ "LTXVideoTransformer3DModel",
102
111
  "LuminaNextDiT2DModel",
112
+ "MochiTransformer3DModel",
103
113
  "ModelMixin",
104
114
  "MotionAdapter",
105
115
  "MultiAdapter",
116
+ "MultiControlNetModel",
106
117
  "PixArtTransformer2DModel",
107
118
  "PriorTransformer",
119
+ "SanaTransformer2DModel",
108
120
  "SD3ControlNetModel",
109
121
  "SD3MultiControlNetModel",
110
122
  "SD3Transformer2DModel",
@@ -237,6 +249,7 @@ except OptionalDependencyNotAvailable:
237
249
  else:
238
250
  _import_structure["pipelines"].extend(
239
251
  [
252
+ "AllegroPipeline",
240
253
  "AltDiffusionImg2ImgPipeline",
241
254
  "AltDiffusionPipeline",
242
255
  "AmusedImg2ImgPipeline",
@@ -263,15 +276,21 @@ else:
263
276
  "CogVideoXVideoToVideoPipeline",
264
277
  "CogView3PlusPipeline",
265
278
  "CycleDiffusionPipeline",
279
+ "FluxControlImg2ImgPipeline",
280
+ "FluxControlInpaintPipeline",
266
281
  "FluxControlNetImg2ImgPipeline",
267
282
  "FluxControlNetInpaintPipeline",
268
283
  "FluxControlNetPipeline",
284
+ "FluxControlPipeline",
285
+ "FluxFillPipeline",
269
286
  "FluxImg2ImgPipeline",
270
287
  "FluxInpaintPipeline",
271
288
  "FluxPipeline",
289
+ "FluxPriorReduxPipeline",
272
290
  "HunyuanDiTControlNetPipeline",
273
291
  "HunyuanDiTPAGPipeline",
274
292
  "HunyuanDiTPipeline",
293
+ "HunyuanVideoPipeline",
275
294
  "I2VGenXLPipeline",
276
295
  "IFImg2ImgPipeline",
277
296
  "IFImg2ImgSuperResolutionPipeline",
@@ -305,15 +324,21 @@ else:
305
324
  "LDMTextToImagePipeline",
306
325
  "LEditsPPPipelineStableDiffusion",
307
326
  "LEditsPPPipelineStableDiffusionXL",
327
+ "LTXImageToVideoPipeline",
328
+ "LTXPipeline",
308
329
  "LuminaText2ImgPipeline",
309
330
  "MarigoldDepthPipeline",
310
331
  "MarigoldNormalsPipeline",
332
+ "MochiPipeline",
311
333
  "MusicLDMPipeline",
312
334
  "PaintByExamplePipeline",
313
335
  "PIAPipeline",
314
336
  "PixArtAlphaPipeline",
315
337
  "PixArtSigmaPAGPipeline",
316
338
  "PixArtSigmaPipeline",
339
+ "ReduxImageEncoder",
340
+ "SanaPAGPipeline",
341
+ "SanaPipeline",
317
342
  "SemanticStableDiffusionPipeline",
318
343
  "ShapEImg2ImgPipeline",
319
344
  "ShapEPipeline",
@@ -326,6 +351,8 @@ else:
326
351
  "StableDiffusion3ControlNetPipeline",
327
352
  "StableDiffusion3Img2ImgPipeline",
328
353
  "StableDiffusion3InpaintPipeline",
354
+ "StableDiffusion3PAGImg2ImgPipeline",
355
+ "StableDiffusion3PAGImg2ImgPipeline",
329
356
  "StableDiffusion3PAGPipeline",
330
357
  "StableDiffusion3Pipeline",
331
358
  "StableDiffusionAdapterPipeline",
@@ -349,6 +376,7 @@ else:
349
376
  "StableDiffusionLDM3DPipeline",
350
377
  "StableDiffusionModelEditingPipeline",
351
378
  "StableDiffusionPAGImg2ImgPipeline",
379
+ "StableDiffusionPAGInpaintPipeline",
352
380
  "StableDiffusionPAGPipeline",
353
381
  "StableDiffusionPanoramaPipeline",
354
382
  "StableDiffusionParadigmsPipeline",
@@ -363,6 +391,9 @@ else:
363
391
  "StableDiffusionXLControlNetPAGImg2ImgPipeline",
364
392
  "StableDiffusionXLControlNetPAGPipeline",
365
393
  "StableDiffusionXLControlNetPipeline",
394
+ "StableDiffusionXLControlNetUnionImg2ImgPipeline",
395
+ "StableDiffusionXLControlNetUnionInpaintPipeline",
396
+ "StableDiffusionXLControlNetUnionPipeline",
366
397
  "StableDiffusionXLControlNetXSPipeline",
367
398
  "StableDiffusionXLImg2ImgPipeline",
368
399
  "StableDiffusionXLInpaintPipeline",
@@ -481,7 +512,7 @@ except OptionalDependencyNotAvailable:
481
512
 
482
513
 
483
514
  else:
484
- _import_structure["models.controlnet_flax"] = ["FlaxControlNetModel"]
515
+ _import_structure["models.controlnets.controlnet_flax"] = ["FlaxControlNetModel"]
485
516
  _import_structure["models.modeling_flax_utils"] = ["FlaxModelMixin"]
486
517
  _import_structure["models.unets.unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"]
487
518
  _import_structure["models.vae_flax"] = ["FlaxAutoencoderKL"]
@@ -539,7 +570,7 @@ else:
539
570
 
540
571
  if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
541
572
  from .configuration_utils import ConfigMixin
542
- from .quantizers.quantization_config import BitsAndBytesConfig
573
+ from .quantizers.quantization_config import BitsAndBytesConfig, GGUFQuantizationConfig, TorchAoConfig
543
574
 
544
575
  try:
545
576
  if not is_onnx_available():
@@ -556,10 +587,16 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
556
587
  from .utils.dummy_pt_objects import * # noqa F403
557
588
  else:
558
589
  from .models import (
590
+ AllegroTransformer3DModel,
559
591
  AsymmetricAutoencoderKL,
560
592
  AuraFlowTransformer2DModel,
593
+ AutoencoderDC,
561
594
  AutoencoderKL,
595
+ AutoencoderKLAllegro,
562
596
  AutoencoderKLCogVideoX,
597
+ AutoencoderKLHunyuanVideo,
598
+ AutoencoderKLLTXVideo,
599
+ AutoencoderKLMochi,
563
600
  AutoencoderKLTemporalDecoder,
564
601
  AutoencoderOobleck,
565
602
  AutoencoderTiny,
@@ -567,6 +604,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
567
604
  CogView3PlusTransformer2DModel,
568
605
  ConsistencyDecoderVAE,
569
606
  ControlNetModel,
607
+ ControlNetUnionModel,
570
608
  ControlNetXSAdapter,
571
609
  DiTTransformer2DModel,
572
610
  FluxControlNetModel,
@@ -575,15 +613,20 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
575
613
  HunyuanDiT2DControlNetModel,
576
614
  HunyuanDiT2DModel,
577
615
  HunyuanDiT2DMultiControlNetModel,
616
+ HunyuanVideoTransformer3DModel,
578
617
  I2VGenXLUNet,
579
618
  Kandinsky3UNet,
580
619
  LatteTransformer3DModel,
620
+ LTXVideoTransformer3DModel,
581
621
  LuminaNextDiT2DModel,
622
+ MochiTransformer3DModel,
582
623
  ModelMixin,
583
624
  MotionAdapter,
584
625
  MultiAdapter,
626
+ MultiControlNetModel,
585
627
  PixArtTransformer2DModel,
586
628
  PriorTransformer,
629
+ SanaTransformer2DModel,
587
630
  SD3ControlNetModel,
588
631
  SD3MultiControlNetModel,
589
632
  SD3Transformer2DModel,
@@ -697,6 +740,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
697
740
  from .utils.dummy_torch_and_transformers_objects import * # noqa F403
698
741
  else:
699
742
  from .pipelines import (
743
+ AllegroPipeline,
700
744
  AltDiffusionImg2ImgPipeline,
701
745
  AltDiffusionPipeline,
702
746
  AmusedImg2ImgPipeline,
@@ -721,15 +765,21 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
721
765
  CogVideoXVideoToVideoPipeline,
722
766
  CogView3PlusPipeline,
723
767
  CycleDiffusionPipeline,
768
+ FluxControlImg2ImgPipeline,
769
+ FluxControlInpaintPipeline,
724
770
  FluxControlNetImg2ImgPipeline,
725
771
  FluxControlNetInpaintPipeline,
726
772
  FluxControlNetPipeline,
773
+ FluxControlPipeline,
774
+ FluxFillPipeline,
727
775
  FluxImg2ImgPipeline,
728
776
  FluxInpaintPipeline,
729
777
  FluxPipeline,
778
+ FluxPriorReduxPipeline,
730
779
  HunyuanDiTControlNetPipeline,
731
780
  HunyuanDiTPAGPipeline,
732
781
  HunyuanDiTPipeline,
782
+ HunyuanVideoPipeline,
733
783
  I2VGenXLPipeline,
734
784
  IFImg2ImgPipeline,
735
785
  IFImg2ImgSuperResolutionPipeline,
@@ -763,15 +813,21 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
763
813
  LDMTextToImagePipeline,
764
814
  LEditsPPPipelineStableDiffusion,
765
815
  LEditsPPPipelineStableDiffusionXL,
816
+ LTXImageToVideoPipeline,
817
+ LTXPipeline,
766
818
  LuminaText2ImgPipeline,
767
819
  MarigoldDepthPipeline,
768
820
  MarigoldNormalsPipeline,
821
+ MochiPipeline,
769
822
  MusicLDMPipeline,
770
823
  PaintByExamplePipeline,
771
824
  PIAPipeline,
772
825
  PixArtAlphaPipeline,
773
826
  PixArtSigmaPAGPipeline,
774
827
  PixArtSigmaPipeline,
828
+ ReduxImageEncoder,
829
+ SanaPAGPipeline,
830
+ SanaPipeline,
775
831
  SemanticStableDiffusionPipeline,
776
832
  ShapEImg2ImgPipeline,
777
833
  ShapEPipeline,
@@ -783,6 +839,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
783
839
  StableDiffusion3ControlNetPipeline,
784
840
  StableDiffusion3Img2ImgPipeline,
785
841
  StableDiffusion3InpaintPipeline,
842
+ StableDiffusion3PAGImg2ImgPipeline,
786
843
  StableDiffusion3PAGPipeline,
787
844
  StableDiffusion3Pipeline,
788
845
  StableDiffusionAdapterPipeline,
@@ -806,6 +863,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
806
863
  StableDiffusionLDM3DPipeline,
807
864
  StableDiffusionModelEditingPipeline,
808
865
  StableDiffusionPAGImg2ImgPipeline,
866
+ StableDiffusionPAGInpaintPipeline,
809
867
  StableDiffusionPAGPipeline,
810
868
  StableDiffusionPanoramaPipeline,
811
869
  StableDiffusionParadigmsPipeline,
@@ -820,6 +878,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
820
878
  StableDiffusionXLControlNetPAGImg2ImgPipeline,
821
879
  StableDiffusionXLControlNetPAGPipeline,
822
880
  StableDiffusionXLControlNetPipeline,
881
+ StableDiffusionXLControlNetUnionImg2ImgPipeline,
882
+ StableDiffusionXLControlNetUnionInpaintPipeline,
883
+ StableDiffusionXLControlNetUnionPipeline,
823
884
  StableDiffusionXLControlNetXSPipeline,
824
885
  StableDiffusionXLImg2ImgPipeline,
825
886
  StableDiffusionXLInpaintPipeline,
@@ -902,7 +963,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
902
963
  except OptionalDependencyNotAvailable:
903
964
  from .utils.dummy_flax_objects import * # noqa F403
904
965
  else:
905
- from .models.controlnet_flax import FlaxControlNetModel
966
+ from .models.controlnets.controlnet_flax import FlaxControlNetModel
906
967
  from .models.modeling_flax_utils import FlaxModelMixin
907
968
  from .models.unets.unet_2d_condition_flax import FlaxUNet2DConditionModel
908
969
  from .models.vae_flax import FlaxAutoencoderKL
diffusers/callbacks.py CHANGED
@@ -97,13 +97,17 @@ class SDCFGCutoffCallback(PipelineCallback):
97
97
 
98
98
  class SDXLCFGCutoffCallback(PipelineCallback):
99
99
  """
100
- Callback function for Stable Diffusion XL Pipelines. After certain number of steps (set by `cutoff_step_ratio` or
101
- `cutoff_step_index`), this callback will disable the CFG.
100
+ Callback function for the base Stable Diffusion XL Pipelines. After certain number of steps (set by
101
+ `cutoff_step_ratio` or `cutoff_step_index`), this callback will disable the CFG.
102
102
 
103
103
  Note: This callback mutates the pipeline by changing the `_guidance_scale` attribute to 0.0 after the cutoff step.
104
104
  """
105
105
 
106
- tensor_inputs = ["prompt_embeds", "add_text_embeds", "add_time_ids"]
106
+ tensor_inputs = [
107
+ "prompt_embeds",
108
+ "add_text_embeds",
109
+ "add_time_ids",
110
+ ]
107
111
 
108
112
  def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]:
109
113
  cutoff_step_ratio = self.config.cutoff_step_ratio
@@ -129,6 +133,55 @@ class SDXLCFGCutoffCallback(PipelineCallback):
129
133
  callback_kwargs[self.tensor_inputs[0]] = prompt_embeds
130
134
  callback_kwargs[self.tensor_inputs[1]] = add_text_embeds
131
135
  callback_kwargs[self.tensor_inputs[2]] = add_time_ids
136
+
137
+ return callback_kwargs
138
+
139
+
140
+ class SDXLControlnetCFGCutoffCallback(PipelineCallback):
141
+ """
142
+ Callback function for the Controlnet Stable Diffusion XL Pipelines. After certain number of steps (set by
143
+ `cutoff_step_ratio` or `cutoff_step_index`), this callback will disable the CFG.
144
+
145
+ Note: This callback mutates the pipeline by changing the `_guidance_scale` attribute to 0.0 after the cutoff step.
146
+ """
147
+
148
+ tensor_inputs = [
149
+ "prompt_embeds",
150
+ "add_text_embeds",
151
+ "add_time_ids",
152
+ "image",
153
+ ]
154
+
155
+ def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]:
156
+ cutoff_step_ratio = self.config.cutoff_step_ratio
157
+ cutoff_step_index = self.config.cutoff_step_index
158
+
159
+ # Use cutoff_step_index if it's not None, otherwise use cutoff_step_ratio
160
+ cutoff_step = (
161
+ cutoff_step_index if cutoff_step_index is not None else int(pipeline.num_timesteps * cutoff_step_ratio)
162
+ )
163
+
164
+ if step_index == cutoff_step:
165
+ prompt_embeds = callback_kwargs[self.tensor_inputs[0]]
166
+ prompt_embeds = prompt_embeds[-1:] # "-1" denotes the embeddings for conditional text tokens.
167
+
168
+ add_text_embeds = callback_kwargs[self.tensor_inputs[1]]
169
+ add_text_embeds = add_text_embeds[-1:] # "-1" denotes the embeddings for conditional pooled text tokens
170
+
171
+ add_time_ids = callback_kwargs[self.tensor_inputs[2]]
172
+ add_time_ids = add_time_ids[-1:] # "-1" denotes the embeddings for conditional added time vector
173
+
174
+ # For Controlnet
175
+ image = callback_kwargs[self.tensor_inputs[3]]
176
+ image = image[-1:]
177
+
178
+ pipeline._guidance_scale = 0.0
179
+
180
+ callback_kwargs[self.tensor_inputs[0]] = prompt_embeds
181
+ callback_kwargs[self.tensor_inputs[1]] = add_text_embeds
182
+ callback_kwargs[self.tensor_inputs[2]] = add_time_ids
183
+ callback_kwargs[self.tensor_inputs[3]] = image
184
+
132
185
  return callback_kwargs
133
186
 
134
187
 
@@ -170,7 +170,7 @@ class ConfigMixin:
170
170
 
171
171
  if push_to_hub:
172
172
  commit_message = kwargs.pop("commit_message", None)
173
- private = kwargs.pop("private", False)
173
+ private = kwargs.pop("private", None)
174
174
  create_pr = kwargs.pop("create_pr", False)
175
175
  token = kwargs.pop("token", None)
176
176
  repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
@@ -38,7 +38,7 @@ deps = {
38
38
  "regex": "regex!=2019.12.17",
39
39
  "requests": "requests",
40
40
  "tensorboard": "tensorboard",
41
- "torch": "torch>=1.4,<2.5.0",
41
+ "torch": "torch>=1.4",
42
42
  "torchvision": "torchvision",
43
43
  "transformers": "transformers>=4.41.2",
44
44
  "urllib3": "urllib3<=2.0.0",
@@ -236,7 +236,7 @@ class VaeImageProcessor(ConfigMixin):
236
236
  `np.ndarray` or `torch.Tensor`:
237
237
  The denormalized image array.
238
238
  """
239
- return (images / 2 + 0.5).clamp(0, 1)
239
+ return (images * 0.5 + 0.5).clamp(0, 1)
240
240
 
241
241
  @staticmethod
242
242
  def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
@@ -537,6 +537,26 @@ class VaeImageProcessor(ConfigMixin):
537
537
 
538
538
  return image
539
539
 
540
+ def _denormalize_conditionally(
541
+ self, images: torch.Tensor, do_denormalize: Optional[List[bool]] = None
542
+ ) -> torch.Tensor:
543
+ r"""
544
+ Denormalize a batch of images based on a condition list.
545
+
546
+ Args:
547
+ images (`torch.Tensor`):
548
+ The input image tensor.
549
+ do_denormalize (`Optional[List[bool]`, *optional*, defaults to `None`):
550
+ A list of booleans indicating whether to denormalize each image in the batch. If `None`, will use the
551
+ value of `do_normalize` in the `VaeImageProcessor` config.
552
+ """
553
+ if do_denormalize is None:
554
+ return self.denormalize(images) if self.config.do_normalize else images
555
+
556
+ return torch.stack(
557
+ [self.denormalize(images[i]) if do_denormalize[i] else images[i] for i in range(images.shape[0])]
558
+ )
559
+
540
560
  def get_default_height_width(
541
561
  self,
542
562
  image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
@@ -752,12 +772,7 @@ class VaeImageProcessor(ConfigMixin):
752
772
  if output_type == "latent":
753
773
  return image
754
774
 
755
- if do_denormalize is None:
756
- do_denormalize = [self.config.do_normalize] * image.shape[0]
757
-
758
- image = torch.stack(
759
- [self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
760
- )
775
+ image = self._denormalize_conditionally(image, do_denormalize)
761
776
 
762
777
  if output_type == "pt":
763
778
  return image
@@ -795,13 +810,11 @@ class VaeImageProcessor(ConfigMixin):
795
810
  The final image with the overlay applied.
796
811
  """
797
812
 
798
- width, height = image.width, image.height
799
-
800
- init_image = self.resize(init_image, width=width, height=height)
801
- mask = self.resize(mask, width=width, height=height)
813
+ width, height = init_image.width, init_image.height
802
814
 
803
815
  init_image_masked = PIL.Image.new("RGBa", (width, height))
804
816
  init_image_masked.paste(init_image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(mask.convert("L")))
817
+
805
818
  init_image_masked = init_image_masked.convert("RGBA")
806
819
 
807
820
  if crop_coords is not None:
@@ -968,12 +981,7 @@ class VaeImageProcessorLDM3D(VaeImageProcessor):
968
981
  deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False)
969
982
  output_type = "np"
970
983
 
971
- if do_denormalize is None:
972
- do_denormalize = [self.config.do_normalize] * image.shape[0]
973
-
974
- image = torch.stack(
975
- [self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
976
- )
984
+ image = self._denormalize_conditionally(image, do_denormalize)
977
985
 
978
986
  image = self.pt_to_numpy(image)
979
987
 
@@ -55,7 +55,8 @@ _import_structure = {}
55
55
 
56
56
  if is_torch_available():
57
57
  _import_structure["single_file_model"] = ["FromOriginalModelMixin"]
58
-
58
+ _import_structure["transformer_flux"] = ["FluxTransformer2DLoadersMixin"]
59
+ _import_structure["transformer_sd3"] = ["SD3Transformer2DLoadersMixin"]
59
60
  _import_structure["unet"] = ["UNet2DConditionLoadersMixin"]
60
61
  _import_structure["utils"] = ["AttnProcsLayers"]
61
62
  if is_transformers_available():
@@ -65,12 +66,20 @@ if is_torch_available():
65
66
  "StableDiffusionLoraLoaderMixin",
66
67
  "SD3LoraLoaderMixin",
67
68
  "StableDiffusionXLLoraLoaderMixin",
69
+ "LTXVideoLoraLoaderMixin",
68
70
  "LoraLoaderMixin",
69
71
  "FluxLoraLoaderMixin",
70
72
  "CogVideoXLoraLoaderMixin",
73
+ "Mochi1LoraLoaderMixin",
74
+ "HunyuanVideoLoraLoaderMixin",
75
+ "SanaLoraLoaderMixin",
71
76
  ]
72
77
  _import_structure["textual_inversion"] = ["TextualInversionLoaderMixin"]
73
- _import_structure["ip_adapter"] = ["IPAdapterMixin"]
78
+ _import_structure["ip_adapter"] = [
79
+ "IPAdapterMixin",
80
+ "FluxIPAdapterMixin",
81
+ "SD3IPAdapterMixin",
82
+ ]
74
83
 
75
84
  _import_structure["peft"] = ["PeftAdapterMixin"]
76
85
 
@@ -78,16 +87,26 @@ _import_structure["peft"] = ["PeftAdapterMixin"]
78
87
  if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
79
88
  if is_torch_available():
80
89
  from .single_file_model import FromOriginalModelMixin
90
+ from .transformer_flux import FluxTransformer2DLoadersMixin
91
+ from .transformer_sd3 import SD3Transformer2DLoadersMixin
81
92
  from .unet import UNet2DConditionLoadersMixin
82
93
  from .utils import AttnProcsLayers
83
94
 
84
95
  if is_transformers_available():
85
- from .ip_adapter import IPAdapterMixin
96
+ from .ip_adapter import (
97
+ FluxIPAdapterMixin,
98
+ IPAdapterMixin,
99
+ SD3IPAdapterMixin,
100
+ )
86
101
  from .lora_pipeline import (
87
102
  AmusedLoraLoaderMixin,
88
103
  CogVideoXLoraLoaderMixin,
89
104
  FluxLoraLoaderMixin,
105
+ HunyuanVideoLoraLoaderMixin,
90
106
  LoraLoaderMixin,
107
+ LTXVideoLoraLoaderMixin,
108
+ Mochi1LoraLoaderMixin,
109
+ SanaLoraLoaderMixin,
91
110
  SD3LoraLoaderMixin,
92
111
  StableDiffusionLoraLoaderMixin,
93
112
  StableDiffusionXLLoraLoaderMixin,