diffusers 0.31.0__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +66 -5
- diffusers/callbacks.py +56 -3
- diffusers/configuration_utils.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/image_processor.py +25 -17
- diffusers/loaders/__init__.py +22 -3
- diffusers/loaders/ip_adapter.py +538 -15
- diffusers/loaders/lora_base.py +124 -118
- diffusers/loaders/lora_conversion_utils.py +318 -3
- diffusers/loaders/lora_pipeline.py +1688 -368
- diffusers/loaders/peft.py +379 -0
- diffusers/loaders/single_file_model.py +71 -4
- diffusers/loaders/single_file_utils.py +519 -9
- diffusers/loaders/textual_inversion.py +3 -3
- diffusers/loaders/transformer_flux.py +181 -0
- diffusers/loaders/transformer_sd3.py +89 -0
- diffusers/loaders/unet.py +17 -4
- diffusers/models/__init__.py +47 -14
- diffusers/models/activations.py +22 -9
- diffusers/models/attention.py +13 -4
- diffusers/models/attention_flax.py +1 -1
- diffusers/models/attention_processor.py +2059 -281
- diffusers/models/autoencoders/__init__.py +5 -0
- diffusers/models/autoencoders/autoencoder_dc.py +620 -0
- diffusers/models/autoencoders/autoencoder_kl.py +2 -1
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +36 -27
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
- diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
- diffusers/models/autoencoders/vae.py +18 -5
- diffusers/models/controlnet.py +47 -802
- diffusers/models/controlnet_flux.py +29 -495
- diffusers/models/controlnet_sd3.py +25 -379
- diffusers/models/controlnet_sparsectrl.py +46 -718
- diffusers/models/controlnets/__init__.py +23 -0
- diffusers/models/controlnets/controlnet.py +872 -0
- diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
- diffusers/models/controlnets/controlnet_flux.py +536 -0
- diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
- diffusers/models/controlnets/controlnet_sd3.py +489 -0
- diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
- diffusers/models/controlnets/controlnet_union.py +832 -0
- diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
- diffusers/models/controlnets/multicontrolnet.py +183 -0
- diffusers/models/embeddings.py +838 -43
- diffusers/models/model_loading_utils.py +88 -6
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +74 -28
- diffusers/models/normalization.py +78 -13
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +2 -2
- diffusers/models/transformers/cogvideox_transformer_3d.py +46 -11
- diffusers/models/transformers/dit_transformer_2d.py +1 -1
- diffusers/models/transformers/latte_transformer_3d.py +4 -4
- diffusers/models/transformers/pixart_transformer_2d.py +1 -1
- diffusers/models/transformers/sana_transformer.py +488 -0
- diffusers/models/transformers/stable_audio_transformer.py +1 -1
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +422 -0
- diffusers/models/transformers/transformer_cogview3plus.py +1 -1
- diffusers/models/transformers/transformer_flux.py +30 -9
- diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
- diffusers/models/transformers/transformer_ltx.py +469 -0
- diffusers/models/transformers/transformer_mochi.py +499 -0
- diffusers/models/transformers/transformer_sd3.py +105 -17
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +8 -1
- diffusers/models/unets/unet_2d_blocks.py +88 -21
- diffusers/models/unets/unet_2d_condition.py +1 -1
- diffusers/models/unets/unet_3d_blocks.py +9 -7
- diffusers/models/unets/unet_motion_model.py +5 -5
- diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
- diffusers/models/unets/unet_stable_cascade.py +2 -2
- diffusers/models/unets/uvit_2d.py +1 -1
- diffusers/models/upsampling.py +8 -0
- diffusers/pipelines/__init__.py +34 -0
- diffusers/pipelines/allegro/__init__.py +48 -0
- diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
- diffusers/pipelines/allegro/pipeline_output.py +23 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +8 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1 -1
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +0 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +8 -8
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +1 -8
- diffusers/pipelines/auto_pipeline.py +53 -6
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +50 -22
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +51 -20
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +69 -21
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +47 -21
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +1 -1
- diffusers/pipelines/controlnet/__init__.py +86 -80
- diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
- diffusers/pipelines/controlnet/pipeline_controlnet.py +11 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +1 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +1 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +1 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +3 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +1 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +5 -1
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +53 -19
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +31 -8
- diffusers/pipelines/flux/__init__.py +13 -1
- diffusers/pipelines/flux/modeling_flux.py +47 -0
- diffusers/pipelines/flux/pipeline_flux.py +204 -29
- diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +49 -27
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +40 -30
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +78 -56
- diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
- diffusers/pipelines/flux/pipeline_flux_img2img.py +33 -27
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +36 -29
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
- diffusers/pipelines/flux/pipeline_output.py +16 -0
- diffusers/pipelines/hunyuan_video/__init__.py +48 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
- diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +5 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
- diffusers/pipelines/kolors/text_encoder.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/ltx/__init__.py +50 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
- diffusers/pipelines/ltx/pipeline_output.py +20 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +1 -8
- diffusers/pipelines/mochi/__init__.py +48 -0
- diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
- diffusers/pipelines/mochi/pipeline_output.py +20 -0
- diffusers/pipelines/pag/__init__.py +7 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +5 -1
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +6 -13
- diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +3 -0
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
- diffusers/pipelines/pipeline_flax_utils.py +1 -1
- diffusers/pipelines/pipeline_loading_utils.py +25 -4
- diffusers/pipelines/pipeline_utils.py +35 -6
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +6 -13
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +6 -13
- diffusers/pipelines/sana/__init__.py +47 -0
- diffusers/pipelines/sana/pipeline_output.py +21 -0
- diffusers/pipelines/sana/pipeline_sana.py +884 -0
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -3
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +216 -20
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +62 -9
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +57 -8
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -8
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -8
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -8
- diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/quantizers/auto.py +14 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -1
- diffusers/quantizers/gguf/__init__.py +1 -0
- diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
- diffusers/quantizers/gguf/utils.py +456 -0
- diffusers/quantizers/quantization_config.py +280 -2
- diffusers/quantizers/torchao/__init__.py +15 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
- diffusers/schedulers/scheduling_ddpm.py +2 -6
- diffusers/schedulers/scheduling_ddpm_parallel.py +2 -6
- diffusers/schedulers/scheduling_deis_multistep.py +28 -9
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +35 -9
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +35 -8
- diffusers/schedulers/scheduling_dpmsolver_sde.py +4 -4
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +48 -10
- diffusers/schedulers/scheduling_euler_discrete.py +4 -4
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
- diffusers/schedulers/scheduling_heun_discrete.py +4 -4
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +4 -4
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +4 -4
- diffusers/schedulers/scheduling_lcm.py +2 -6
- diffusers/schedulers/scheduling_lms_discrete.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +1 -1
- diffusers/schedulers/scheduling_sasolver.py +28 -9
- diffusers/schedulers/scheduling_tcd.py +2 -6
- diffusers/schedulers/scheduling_unipc_multistep.py +53 -8
- diffusers/training_utils.py +16 -2
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/constants.py +1 -0
- diffusers/utils/dummy_pt_objects.py +180 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dynamic_modules_utils.py +3 -3
- diffusers/utils/hub_utils.py +31 -39
- diffusers/utils/import_utils.py +67 -0
- diffusers/utils/peft_utils.py +3 -0
- diffusers/utils/testing_utils.py +56 -1
- diffusers/utils/torch_utils.py +3 -0
- {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/METADATA +69 -69
- {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/RECORD +214 -162
- {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
- {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
- {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0
diffusers/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
__version__ = "0.
|
1
|
+
__version__ = "0.32.0"
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING
|
4
4
|
|
@@ -31,7 +31,7 @@ _import_structure = {
|
|
31
31
|
"loaders": ["FromOriginalModelMixin"],
|
32
32
|
"models": [],
|
33
33
|
"pipelines": [],
|
34
|
-
"quantizers.quantization_config": ["BitsAndBytesConfig"],
|
34
|
+
"quantizers.quantization_config": ["BitsAndBytesConfig", "GGUFQuantizationConfig", "TorchAoConfig"],
|
35
35
|
"schedulers": [],
|
36
36
|
"utils": [
|
37
37
|
"OptionalDependencyNotAvailable",
|
@@ -77,10 +77,16 @@ except OptionalDependencyNotAvailable:
|
|
77
77
|
else:
|
78
78
|
_import_structure["models"].extend(
|
79
79
|
[
|
80
|
+
"AllegroTransformer3DModel",
|
80
81
|
"AsymmetricAutoencoderKL",
|
81
82
|
"AuraFlowTransformer2DModel",
|
83
|
+
"AutoencoderDC",
|
82
84
|
"AutoencoderKL",
|
85
|
+
"AutoencoderKLAllegro",
|
83
86
|
"AutoencoderKLCogVideoX",
|
87
|
+
"AutoencoderKLHunyuanVideo",
|
88
|
+
"AutoencoderKLLTXVideo",
|
89
|
+
"AutoencoderKLMochi",
|
84
90
|
"AutoencoderKLTemporalDecoder",
|
85
91
|
"AutoencoderOobleck",
|
86
92
|
"AutoencoderTiny",
|
@@ -88,6 +94,7 @@ else:
|
|
88
94
|
"CogView3PlusTransformer2DModel",
|
89
95
|
"ConsistencyDecoderVAE",
|
90
96
|
"ControlNetModel",
|
97
|
+
"ControlNetUnionModel",
|
91
98
|
"ControlNetXSAdapter",
|
92
99
|
"DiTTransformer2DModel",
|
93
100
|
"FluxControlNetModel",
|
@@ -96,15 +103,20 @@ else:
|
|
96
103
|
"HunyuanDiT2DControlNetModel",
|
97
104
|
"HunyuanDiT2DModel",
|
98
105
|
"HunyuanDiT2DMultiControlNetModel",
|
106
|
+
"HunyuanVideoTransformer3DModel",
|
99
107
|
"I2VGenXLUNet",
|
100
108
|
"Kandinsky3UNet",
|
101
109
|
"LatteTransformer3DModel",
|
110
|
+
"LTXVideoTransformer3DModel",
|
102
111
|
"LuminaNextDiT2DModel",
|
112
|
+
"MochiTransformer3DModel",
|
103
113
|
"ModelMixin",
|
104
114
|
"MotionAdapter",
|
105
115
|
"MultiAdapter",
|
116
|
+
"MultiControlNetModel",
|
106
117
|
"PixArtTransformer2DModel",
|
107
118
|
"PriorTransformer",
|
119
|
+
"SanaTransformer2DModel",
|
108
120
|
"SD3ControlNetModel",
|
109
121
|
"SD3MultiControlNetModel",
|
110
122
|
"SD3Transformer2DModel",
|
@@ -237,6 +249,7 @@ except OptionalDependencyNotAvailable:
|
|
237
249
|
else:
|
238
250
|
_import_structure["pipelines"].extend(
|
239
251
|
[
|
252
|
+
"AllegroPipeline",
|
240
253
|
"AltDiffusionImg2ImgPipeline",
|
241
254
|
"AltDiffusionPipeline",
|
242
255
|
"AmusedImg2ImgPipeline",
|
@@ -263,15 +276,21 @@ else:
|
|
263
276
|
"CogVideoXVideoToVideoPipeline",
|
264
277
|
"CogView3PlusPipeline",
|
265
278
|
"CycleDiffusionPipeline",
|
279
|
+
"FluxControlImg2ImgPipeline",
|
280
|
+
"FluxControlInpaintPipeline",
|
266
281
|
"FluxControlNetImg2ImgPipeline",
|
267
282
|
"FluxControlNetInpaintPipeline",
|
268
283
|
"FluxControlNetPipeline",
|
284
|
+
"FluxControlPipeline",
|
285
|
+
"FluxFillPipeline",
|
269
286
|
"FluxImg2ImgPipeline",
|
270
287
|
"FluxInpaintPipeline",
|
271
288
|
"FluxPipeline",
|
289
|
+
"FluxPriorReduxPipeline",
|
272
290
|
"HunyuanDiTControlNetPipeline",
|
273
291
|
"HunyuanDiTPAGPipeline",
|
274
292
|
"HunyuanDiTPipeline",
|
293
|
+
"HunyuanVideoPipeline",
|
275
294
|
"I2VGenXLPipeline",
|
276
295
|
"IFImg2ImgPipeline",
|
277
296
|
"IFImg2ImgSuperResolutionPipeline",
|
@@ -305,15 +324,21 @@ else:
|
|
305
324
|
"LDMTextToImagePipeline",
|
306
325
|
"LEditsPPPipelineStableDiffusion",
|
307
326
|
"LEditsPPPipelineStableDiffusionXL",
|
327
|
+
"LTXImageToVideoPipeline",
|
328
|
+
"LTXPipeline",
|
308
329
|
"LuminaText2ImgPipeline",
|
309
330
|
"MarigoldDepthPipeline",
|
310
331
|
"MarigoldNormalsPipeline",
|
332
|
+
"MochiPipeline",
|
311
333
|
"MusicLDMPipeline",
|
312
334
|
"PaintByExamplePipeline",
|
313
335
|
"PIAPipeline",
|
314
336
|
"PixArtAlphaPipeline",
|
315
337
|
"PixArtSigmaPAGPipeline",
|
316
338
|
"PixArtSigmaPipeline",
|
339
|
+
"ReduxImageEncoder",
|
340
|
+
"SanaPAGPipeline",
|
341
|
+
"SanaPipeline",
|
317
342
|
"SemanticStableDiffusionPipeline",
|
318
343
|
"ShapEImg2ImgPipeline",
|
319
344
|
"ShapEPipeline",
|
@@ -326,6 +351,8 @@ else:
|
|
326
351
|
"StableDiffusion3ControlNetPipeline",
|
327
352
|
"StableDiffusion3Img2ImgPipeline",
|
328
353
|
"StableDiffusion3InpaintPipeline",
|
354
|
+
"StableDiffusion3PAGImg2ImgPipeline",
|
355
|
+
"StableDiffusion3PAGImg2ImgPipeline",
|
329
356
|
"StableDiffusion3PAGPipeline",
|
330
357
|
"StableDiffusion3Pipeline",
|
331
358
|
"StableDiffusionAdapterPipeline",
|
@@ -349,6 +376,7 @@ else:
|
|
349
376
|
"StableDiffusionLDM3DPipeline",
|
350
377
|
"StableDiffusionModelEditingPipeline",
|
351
378
|
"StableDiffusionPAGImg2ImgPipeline",
|
379
|
+
"StableDiffusionPAGInpaintPipeline",
|
352
380
|
"StableDiffusionPAGPipeline",
|
353
381
|
"StableDiffusionPanoramaPipeline",
|
354
382
|
"StableDiffusionParadigmsPipeline",
|
@@ -363,6 +391,9 @@ else:
|
|
363
391
|
"StableDiffusionXLControlNetPAGImg2ImgPipeline",
|
364
392
|
"StableDiffusionXLControlNetPAGPipeline",
|
365
393
|
"StableDiffusionXLControlNetPipeline",
|
394
|
+
"StableDiffusionXLControlNetUnionImg2ImgPipeline",
|
395
|
+
"StableDiffusionXLControlNetUnionInpaintPipeline",
|
396
|
+
"StableDiffusionXLControlNetUnionPipeline",
|
366
397
|
"StableDiffusionXLControlNetXSPipeline",
|
367
398
|
"StableDiffusionXLImg2ImgPipeline",
|
368
399
|
"StableDiffusionXLInpaintPipeline",
|
@@ -481,7 +512,7 @@ except OptionalDependencyNotAvailable:
|
|
481
512
|
|
482
513
|
|
483
514
|
else:
|
484
|
-
_import_structure["models.controlnet_flax"] = ["FlaxControlNetModel"]
|
515
|
+
_import_structure["models.controlnets.controlnet_flax"] = ["FlaxControlNetModel"]
|
485
516
|
_import_structure["models.modeling_flax_utils"] = ["FlaxModelMixin"]
|
486
517
|
_import_structure["models.unets.unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"]
|
487
518
|
_import_structure["models.vae_flax"] = ["FlaxAutoencoderKL"]
|
@@ -539,7 +570,7 @@ else:
|
|
539
570
|
|
540
571
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
541
572
|
from .configuration_utils import ConfigMixin
|
542
|
-
from .quantizers.quantization_config import BitsAndBytesConfig
|
573
|
+
from .quantizers.quantization_config import BitsAndBytesConfig, GGUFQuantizationConfig, TorchAoConfig
|
543
574
|
|
544
575
|
try:
|
545
576
|
if not is_onnx_available():
|
@@ -556,10 +587,16 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
556
587
|
from .utils.dummy_pt_objects import * # noqa F403
|
557
588
|
else:
|
558
589
|
from .models import (
|
590
|
+
AllegroTransformer3DModel,
|
559
591
|
AsymmetricAutoencoderKL,
|
560
592
|
AuraFlowTransformer2DModel,
|
593
|
+
AutoencoderDC,
|
561
594
|
AutoencoderKL,
|
595
|
+
AutoencoderKLAllegro,
|
562
596
|
AutoencoderKLCogVideoX,
|
597
|
+
AutoencoderKLHunyuanVideo,
|
598
|
+
AutoencoderKLLTXVideo,
|
599
|
+
AutoencoderKLMochi,
|
563
600
|
AutoencoderKLTemporalDecoder,
|
564
601
|
AutoencoderOobleck,
|
565
602
|
AutoencoderTiny,
|
@@ -567,6 +604,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
567
604
|
CogView3PlusTransformer2DModel,
|
568
605
|
ConsistencyDecoderVAE,
|
569
606
|
ControlNetModel,
|
607
|
+
ControlNetUnionModel,
|
570
608
|
ControlNetXSAdapter,
|
571
609
|
DiTTransformer2DModel,
|
572
610
|
FluxControlNetModel,
|
@@ -575,15 +613,20 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
575
613
|
HunyuanDiT2DControlNetModel,
|
576
614
|
HunyuanDiT2DModel,
|
577
615
|
HunyuanDiT2DMultiControlNetModel,
|
616
|
+
HunyuanVideoTransformer3DModel,
|
578
617
|
I2VGenXLUNet,
|
579
618
|
Kandinsky3UNet,
|
580
619
|
LatteTransformer3DModel,
|
620
|
+
LTXVideoTransformer3DModel,
|
581
621
|
LuminaNextDiT2DModel,
|
622
|
+
MochiTransformer3DModel,
|
582
623
|
ModelMixin,
|
583
624
|
MotionAdapter,
|
584
625
|
MultiAdapter,
|
626
|
+
MultiControlNetModel,
|
585
627
|
PixArtTransformer2DModel,
|
586
628
|
PriorTransformer,
|
629
|
+
SanaTransformer2DModel,
|
587
630
|
SD3ControlNetModel,
|
588
631
|
SD3MultiControlNetModel,
|
589
632
|
SD3Transformer2DModel,
|
@@ -697,6 +740,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
697
740
|
from .utils.dummy_torch_and_transformers_objects import * # noqa F403
|
698
741
|
else:
|
699
742
|
from .pipelines import (
|
743
|
+
AllegroPipeline,
|
700
744
|
AltDiffusionImg2ImgPipeline,
|
701
745
|
AltDiffusionPipeline,
|
702
746
|
AmusedImg2ImgPipeline,
|
@@ -721,15 +765,21 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
721
765
|
CogVideoXVideoToVideoPipeline,
|
722
766
|
CogView3PlusPipeline,
|
723
767
|
CycleDiffusionPipeline,
|
768
|
+
FluxControlImg2ImgPipeline,
|
769
|
+
FluxControlInpaintPipeline,
|
724
770
|
FluxControlNetImg2ImgPipeline,
|
725
771
|
FluxControlNetInpaintPipeline,
|
726
772
|
FluxControlNetPipeline,
|
773
|
+
FluxControlPipeline,
|
774
|
+
FluxFillPipeline,
|
727
775
|
FluxImg2ImgPipeline,
|
728
776
|
FluxInpaintPipeline,
|
729
777
|
FluxPipeline,
|
778
|
+
FluxPriorReduxPipeline,
|
730
779
|
HunyuanDiTControlNetPipeline,
|
731
780
|
HunyuanDiTPAGPipeline,
|
732
781
|
HunyuanDiTPipeline,
|
782
|
+
HunyuanVideoPipeline,
|
733
783
|
I2VGenXLPipeline,
|
734
784
|
IFImg2ImgPipeline,
|
735
785
|
IFImg2ImgSuperResolutionPipeline,
|
@@ -763,15 +813,21 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
763
813
|
LDMTextToImagePipeline,
|
764
814
|
LEditsPPPipelineStableDiffusion,
|
765
815
|
LEditsPPPipelineStableDiffusionXL,
|
816
|
+
LTXImageToVideoPipeline,
|
817
|
+
LTXPipeline,
|
766
818
|
LuminaText2ImgPipeline,
|
767
819
|
MarigoldDepthPipeline,
|
768
820
|
MarigoldNormalsPipeline,
|
821
|
+
MochiPipeline,
|
769
822
|
MusicLDMPipeline,
|
770
823
|
PaintByExamplePipeline,
|
771
824
|
PIAPipeline,
|
772
825
|
PixArtAlphaPipeline,
|
773
826
|
PixArtSigmaPAGPipeline,
|
774
827
|
PixArtSigmaPipeline,
|
828
|
+
ReduxImageEncoder,
|
829
|
+
SanaPAGPipeline,
|
830
|
+
SanaPipeline,
|
775
831
|
SemanticStableDiffusionPipeline,
|
776
832
|
ShapEImg2ImgPipeline,
|
777
833
|
ShapEPipeline,
|
@@ -783,6 +839,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
783
839
|
StableDiffusion3ControlNetPipeline,
|
784
840
|
StableDiffusion3Img2ImgPipeline,
|
785
841
|
StableDiffusion3InpaintPipeline,
|
842
|
+
StableDiffusion3PAGImg2ImgPipeline,
|
786
843
|
StableDiffusion3PAGPipeline,
|
787
844
|
StableDiffusion3Pipeline,
|
788
845
|
StableDiffusionAdapterPipeline,
|
@@ -806,6 +863,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
806
863
|
StableDiffusionLDM3DPipeline,
|
807
864
|
StableDiffusionModelEditingPipeline,
|
808
865
|
StableDiffusionPAGImg2ImgPipeline,
|
866
|
+
StableDiffusionPAGInpaintPipeline,
|
809
867
|
StableDiffusionPAGPipeline,
|
810
868
|
StableDiffusionPanoramaPipeline,
|
811
869
|
StableDiffusionParadigmsPipeline,
|
@@ -820,6 +878,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
820
878
|
StableDiffusionXLControlNetPAGImg2ImgPipeline,
|
821
879
|
StableDiffusionXLControlNetPAGPipeline,
|
822
880
|
StableDiffusionXLControlNetPipeline,
|
881
|
+
StableDiffusionXLControlNetUnionImg2ImgPipeline,
|
882
|
+
StableDiffusionXLControlNetUnionInpaintPipeline,
|
883
|
+
StableDiffusionXLControlNetUnionPipeline,
|
823
884
|
StableDiffusionXLControlNetXSPipeline,
|
824
885
|
StableDiffusionXLImg2ImgPipeline,
|
825
886
|
StableDiffusionXLInpaintPipeline,
|
@@ -902,7 +963,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
902
963
|
except OptionalDependencyNotAvailable:
|
903
964
|
from .utils.dummy_flax_objects import * # noqa F403
|
904
965
|
else:
|
905
|
-
from .models.controlnet_flax import FlaxControlNetModel
|
966
|
+
from .models.controlnets.controlnet_flax import FlaxControlNetModel
|
906
967
|
from .models.modeling_flax_utils import FlaxModelMixin
|
907
968
|
from .models.unets.unet_2d_condition_flax import FlaxUNet2DConditionModel
|
908
969
|
from .models.vae_flax import FlaxAutoencoderKL
|
diffusers/callbacks.py
CHANGED
@@ -97,13 +97,17 @@ class SDCFGCutoffCallback(PipelineCallback):
|
|
97
97
|
|
98
98
|
class SDXLCFGCutoffCallback(PipelineCallback):
|
99
99
|
"""
|
100
|
-
Callback function for Stable Diffusion XL Pipelines. After certain number of steps (set by
|
101
|
-
`cutoff_step_index`), this callback will disable the CFG.
|
100
|
+
Callback function for the base Stable Diffusion XL Pipelines. After certain number of steps (set by
|
101
|
+
`cutoff_step_ratio` or `cutoff_step_index`), this callback will disable the CFG.
|
102
102
|
|
103
103
|
Note: This callback mutates the pipeline by changing the `_guidance_scale` attribute to 0.0 after the cutoff step.
|
104
104
|
"""
|
105
105
|
|
106
|
-
tensor_inputs = [
|
106
|
+
tensor_inputs = [
|
107
|
+
"prompt_embeds",
|
108
|
+
"add_text_embeds",
|
109
|
+
"add_time_ids",
|
110
|
+
]
|
107
111
|
|
108
112
|
def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]:
|
109
113
|
cutoff_step_ratio = self.config.cutoff_step_ratio
|
@@ -129,6 +133,55 @@ class SDXLCFGCutoffCallback(PipelineCallback):
|
|
129
133
|
callback_kwargs[self.tensor_inputs[0]] = prompt_embeds
|
130
134
|
callback_kwargs[self.tensor_inputs[1]] = add_text_embeds
|
131
135
|
callback_kwargs[self.tensor_inputs[2]] = add_time_ids
|
136
|
+
|
137
|
+
return callback_kwargs
|
138
|
+
|
139
|
+
|
140
|
+
class SDXLControlnetCFGCutoffCallback(PipelineCallback):
|
141
|
+
"""
|
142
|
+
Callback function for the Controlnet Stable Diffusion XL Pipelines. After certain number of steps (set by
|
143
|
+
`cutoff_step_ratio` or `cutoff_step_index`), this callback will disable the CFG.
|
144
|
+
|
145
|
+
Note: This callback mutates the pipeline by changing the `_guidance_scale` attribute to 0.0 after the cutoff step.
|
146
|
+
"""
|
147
|
+
|
148
|
+
tensor_inputs = [
|
149
|
+
"prompt_embeds",
|
150
|
+
"add_text_embeds",
|
151
|
+
"add_time_ids",
|
152
|
+
"image",
|
153
|
+
]
|
154
|
+
|
155
|
+
def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]:
|
156
|
+
cutoff_step_ratio = self.config.cutoff_step_ratio
|
157
|
+
cutoff_step_index = self.config.cutoff_step_index
|
158
|
+
|
159
|
+
# Use cutoff_step_index if it's not None, otherwise use cutoff_step_ratio
|
160
|
+
cutoff_step = (
|
161
|
+
cutoff_step_index if cutoff_step_index is not None else int(pipeline.num_timesteps * cutoff_step_ratio)
|
162
|
+
)
|
163
|
+
|
164
|
+
if step_index == cutoff_step:
|
165
|
+
prompt_embeds = callback_kwargs[self.tensor_inputs[0]]
|
166
|
+
prompt_embeds = prompt_embeds[-1:] # "-1" denotes the embeddings for conditional text tokens.
|
167
|
+
|
168
|
+
add_text_embeds = callback_kwargs[self.tensor_inputs[1]]
|
169
|
+
add_text_embeds = add_text_embeds[-1:] # "-1" denotes the embeddings for conditional pooled text tokens
|
170
|
+
|
171
|
+
add_time_ids = callback_kwargs[self.tensor_inputs[2]]
|
172
|
+
add_time_ids = add_time_ids[-1:] # "-1" denotes the embeddings for conditional added time vector
|
173
|
+
|
174
|
+
# For Controlnet
|
175
|
+
image = callback_kwargs[self.tensor_inputs[3]]
|
176
|
+
image = image[-1:]
|
177
|
+
|
178
|
+
pipeline._guidance_scale = 0.0
|
179
|
+
|
180
|
+
callback_kwargs[self.tensor_inputs[0]] = prompt_embeds
|
181
|
+
callback_kwargs[self.tensor_inputs[1]] = add_text_embeds
|
182
|
+
callback_kwargs[self.tensor_inputs[2]] = add_time_ids
|
183
|
+
callback_kwargs[self.tensor_inputs[3]] = image
|
184
|
+
|
132
185
|
return callback_kwargs
|
133
186
|
|
134
187
|
|
diffusers/configuration_utils.py
CHANGED
@@ -170,7 +170,7 @@ class ConfigMixin:
|
|
170
170
|
|
171
171
|
if push_to_hub:
|
172
172
|
commit_message = kwargs.pop("commit_message", None)
|
173
|
-
private = kwargs.pop("private",
|
173
|
+
private = kwargs.pop("private", None)
|
174
174
|
create_pr = kwargs.pop("create_pr", False)
|
175
175
|
token = kwargs.pop("token", None)
|
176
176
|
repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
|
@@ -38,7 +38,7 @@ deps = {
|
|
38
38
|
"regex": "regex!=2019.12.17",
|
39
39
|
"requests": "requests",
|
40
40
|
"tensorboard": "tensorboard",
|
41
|
-
"torch": "torch>=1.4
|
41
|
+
"torch": "torch>=1.4",
|
42
42
|
"torchvision": "torchvision",
|
43
43
|
"transformers": "transformers>=4.41.2",
|
44
44
|
"urllib3": "urllib3<=2.0.0",
|
diffusers/image_processor.py
CHANGED
@@ -236,7 +236,7 @@ class VaeImageProcessor(ConfigMixin):
|
|
236
236
|
`np.ndarray` or `torch.Tensor`:
|
237
237
|
The denormalized image array.
|
238
238
|
"""
|
239
|
-
return (images
|
239
|
+
return (images * 0.5 + 0.5).clamp(0, 1)
|
240
240
|
|
241
241
|
@staticmethod
|
242
242
|
def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
|
@@ -537,6 +537,26 @@ class VaeImageProcessor(ConfigMixin):
|
|
537
537
|
|
538
538
|
return image
|
539
539
|
|
540
|
+
def _denormalize_conditionally(
|
541
|
+
self, images: torch.Tensor, do_denormalize: Optional[List[bool]] = None
|
542
|
+
) -> torch.Tensor:
|
543
|
+
r"""
|
544
|
+
Denormalize a batch of images based on a condition list.
|
545
|
+
|
546
|
+
Args:
|
547
|
+
images (`torch.Tensor`):
|
548
|
+
The input image tensor.
|
549
|
+
do_denormalize (`Optional[List[bool]`, *optional*, defaults to `None`):
|
550
|
+
A list of booleans indicating whether to denormalize each image in the batch. If `None`, will use the
|
551
|
+
value of `do_normalize` in the `VaeImageProcessor` config.
|
552
|
+
"""
|
553
|
+
if do_denormalize is None:
|
554
|
+
return self.denormalize(images) if self.config.do_normalize else images
|
555
|
+
|
556
|
+
return torch.stack(
|
557
|
+
[self.denormalize(images[i]) if do_denormalize[i] else images[i] for i in range(images.shape[0])]
|
558
|
+
)
|
559
|
+
|
540
560
|
def get_default_height_width(
|
541
561
|
self,
|
542
562
|
image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
|
@@ -752,12 +772,7 @@ class VaeImageProcessor(ConfigMixin):
|
|
752
772
|
if output_type == "latent":
|
753
773
|
return image
|
754
774
|
|
755
|
-
|
756
|
-
do_denormalize = [self.config.do_normalize] * image.shape[0]
|
757
|
-
|
758
|
-
image = torch.stack(
|
759
|
-
[self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
|
760
|
-
)
|
775
|
+
image = self._denormalize_conditionally(image, do_denormalize)
|
761
776
|
|
762
777
|
if output_type == "pt":
|
763
778
|
return image
|
@@ -795,13 +810,11 @@ class VaeImageProcessor(ConfigMixin):
|
|
795
810
|
The final image with the overlay applied.
|
796
811
|
"""
|
797
812
|
|
798
|
-
width, height =
|
799
|
-
|
800
|
-
init_image = self.resize(init_image, width=width, height=height)
|
801
|
-
mask = self.resize(mask, width=width, height=height)
|
813
|
+
width, height = init_image.width, init_image.height
|
802
814
|
|
803
815
|
init_image_masked = PIL.Image.new("RGBa", (width, height))
|
804
816
|
init_image_masked.paste(init_image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(mask.convert("L")))
|
817
|
+
|
805
818
|
init_image_masked = init_image_masked.convert("RGBA")
|
806
819
|
|
807
820
|
if crop_coords is not None:
|
@@ -968,12 +981,7 @@ class VaeImageProcessorLDM3D(VaeImageProcessor):
|
|
968
981
|
deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False)
|
969
982
|
output_type = "np"
|
970
983
|
|
971
|
-
|
972
|
-
do_denormalize = [self.config.do_normalize] * image.shape[0]
|
973
|
-
|
974
|
-
image = torch.stack(
|
975
|
-
[self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
|
976
|
-
)
|
984
|
+
image = self._denormalize_conditionally(image, do_denormalize)
|
977
985
|
|
978
986
|
image = self.pt_to_numpy(image)
|
979
987
|
|
diffusers/loaders/__init__.py
CHANGED
@@ -55,7 +55,8 @@ _import_structure = {}
|
|
55
55
|
|
56
56
|
if is_torch_available():
|
57
57
|
_import_structure["single_file_model"] = ["FromOriginalModelMixin"]
|
58
|
-
|
58
|
+
_import_structure["transformer_flux"] = ["FluxTransformer2DLoadersMixin"]
|
59
|
+
_import_structure["transformer_sd3"] = ["SD3Transformer2DLoadersMixin"]
|
59
60
|
_import_structure["unet"] = ["UNet2DConditionLoadersMixin"]
|
60
61
|
_import_structure["utils"] = ["AttnProcsLayers"]
|
61
62
|
if is_transformers_available():
|
@@ -65,12 +66,20 @@ if is_torch_available():
|
|
65
66
|
"StableDiffusionLoraLoaderMixin",
|
66
67
|
"SD3LoraLoaderMixin",
|
67
68
|
"StableDiffusionXLLoraLoaderMixin",
|
69
|
+
"LTXVideoLoraLoaderMixin",
|
68
70
|
"LoraLoaderMixin",
|
69
71
|
"FluxLoraLoaderMixin",
|
70
72
|
"CogVideoXLoraLoaderMixin",
|
73
|
+
"Mochi1LoraLoaderMixin",
|
74
|
+
"HunyuanVideoLoraLoaderMixin",
|
75
|
+
"SanaLoraLoaderMixin",
|
71
76
|
]
|
72
77
|
_import_structure["textual_inversion"] = ["TextualInversionLoaderMixin"]
|
73
|
-
_import_structure["ip_adapter"] = [
|
78
|
+
_import_structure["ip_adapter"] = [
|
79
|
+
"IPAdapterMixin",
|
80
|
+
"FluxIPAdapterMixin",
|
81
|
+
"SD3IPAdapterMixin",
|
82
|
+
]
|
74
83
|
|
75
84
|
_import_structure["peft"] = ["PeftAdapterMixin"]
|
76
85
|
|
@@ -78,16 +87,26 @@ _import_structure["peft"] = ["PeftAdapterMixin"]
|
|
78
87
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
79
88
|
if is_torch_available():
|
80
89
|
from .single_file_model import FromOriginalModelMixin
|
90
|
+
from .transformer_flux import FluxTransformer2DLoadersMixin
|
91
|
+
from .transformer_sd3 import SD3Transformer2DLoadersMixin
|
81
92
|
from .unet import UNet2DConditionLoadersMixin
|
82
93
|
from .utils import AttnProcsLayers
|
83
94
|
|
84
95
|
if is_transformers_available():
|
85
|
-
from .ip_adapter import
|
96
|
+
from .ip_adapter import (
|
97
|
+
FluxIPAdapterMixin,
|
98
|
+
IPAdapterMixin,
|
99
|
+
SD3IPAdapterMixin,
|
100
|
+
)
|
86
101
|
from .lora_pipeline import (
|
87
102
|
AmusedLoraLoaderMixin,
|
88
103
|
CogVideoXLoraLoaderMixin,
|
89
104
|
FluxLoraLoaderMixin,
|
105
|
+
HunyuanVideoLoraLoaderMixin,
|
90
106
|
LoraLoaderMixin,
|
107
|
+
LTXVideoLoraLoaderMixin,
|
108
|
+
Mochi1LoraLoaderMixin,
|
109
|
+
SanaLoraLoaderMixin,
|
91
110
|
SD3LoraLoaderMixin,
|
92
111
|
StableDiffusionLoraLoaderMixin,
|
93
112
|
StableDiffusionXLLoraLoaderMixin,
|