diffusers 0.29.2__py3-none-any.whl → 0.30.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +94 -3
- diffusers/commands/env.py +1 -5
- diffusers/configuration_utils.py +4 -9
- diffusers/dependency_versions_table.py +2 -2
- diffusers/image_processor.py +1 -2
- diffusers/loaders/__init__.py +17 -2
- diffusers/loaders/ip_adapter.py +10 -7
- diffusers/loaders/lora_base.py +752 -0
- diffusers/loaders/lora_pipeline.py +2252 -0
- diffusers/loaders/peft.py +213 -5
- diffusers/loaders/single_file.py +3 -14
- diffusers/loaders/single_file_model.py +31 -10
- diffusers/loaders/single_file_utils.py +293 -8
- diffusers/loaders/textual_inversion.py +1 -6
- diffusers/loaders/unet.py +23 -208
- diffusers/models/__init__.py +20 -0
- diffusers/models/activations.py +22 -0
- diffusers/models/attention.py +386 -7
- diffusers/models/attention_processor.py +1937 -629
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_kl.py +14 -3
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1271 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +1 -1
- diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +1 -0
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vq_model.py +4 -4
- diffusers/models/controlnet.py +2 -3
- diffusers/models/controlnet_hunyuan.py +401 -0
- diffusers/models/controlnet_sd3.py +11 -11
- diffusers/models/controlnet_sparsectrl.py +789 -0
- diffusers/models/controlnet_xs.py +40 -10
- diffusers/models/downsampling.py +68 -0
- diffusers/models/embeddings.py +403 -36
- diffusers/models/model_loading_utils.py +1 -3
- diffusers/models/modeling_flax_utils.py +1 -6
- diffusers/models/modeling_utils.py +4 -16
- diffusers/models/normalization.py +203 -12
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +543 -0
- diffusers/models/transformers/cogvideox_transformer_3d.py +485 -0
- diffusers/models/transformers/hunyuan_transformer_2d.py +19 -15
- diffusers/models/transformers/latte_transformer_3d.py +327 -0
- diffusers/models/transformers/lumina_nextdit2d.py +340 -0
- diffusers/models/transformers/pixart_transformer_2d.py +102 -1
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/stable_audio_transformer.py +458 -0
- diffusers/models/transformers/transformer_flux.py +455 -0
- diffusers/models/transformers/transformer_sd3.py +18 -4
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_condition.py +8 -1
- diffusers/models/unets/unet_3d_blocks.py +51 -920
- diffusers/models/unets/unet_3d_condition.py +4 -1
- diffusers/models/unets/unet_i2vgen_xl.py +4 -1
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +1330 -84
- diffusers/models/unets/unet_spatio_temporal_condition.py +1 -1
- diffusers/models/unets/unet_stable_cascade.py +1 -3
- diffusers/models/unets/uvit_2d.py +1 -1
- diffusers/models/upsampling.py +64 -0
- diffusers/models/vq_model.py +8 -4
- diffusers/optimization.py +1 -1
- diffusers/pipelines/__init__.py +100 -3
- diffusers/pipelines/animatediff/__init__.py +4 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +50 -40
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1076 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +17 -27
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1008 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +51 -38
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +1 -0
- diffusers/pipelines/aura_flow/__init__.py +48 -0
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +591 -0
- diffusers/pipelines/auto_pipeline.py +97 -19
- diffusers/pipelines/cogvideo/__init__.py +48 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +746 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +24 -30
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +31 -30
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +24 -153
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +19 -28
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +18 -28
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +29 -32
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
- diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1042 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +35 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +10 -6
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +0 -4
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +2 -2
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -6
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +6 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +3 -3
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
- diffusers/pipelines/flux/__init__.py +47 -0
- diffusers/pipelines/flux/pipeline_flux.py +749 -0
- diffusers/pipelines/flux/pipeline_output.py +21 -0
- diffusers/pipelines/free_init_utils.py +2 -0
- diffusers/pipelines/free_noise_utils.py +236 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +2 -2
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +2 -2
- diffusers/pipelines/kolors/__init__.py +54 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1247 -0
- diffusers/pipelines/kolors/pipeline_output.py +21 -0
- diffusers/pipelines/kolors/text_encoder.py +889 -0
- diffusers/pipelines/kolors/tokenizer.py +334 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +30 -29
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +23 -29
- diffusers/pipelines/latte/__init__.py +48 -0
- diffusers/pipelines/latte/pipeline_latte.py +881 -0
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +4 -4
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +0 -4
- diffusers/pipelines/lumina/__init__.py +48 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +897 -0
- diffusers/pipelines/pag/__init__.py +67 -0
- diffusers/pipelines/pag/pag_utils.py +237 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1329 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1612 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +953 -0
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +872 -0
- diffusers/pipelines/pag/pipeline_pag_sd.py +1050 -0
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +985 -0
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +862 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1333 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1529 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1753 -0
- diffusers/pipelines/pia/pipeline_pia.py +30 -37
- diffusers/pipelines/pipeline_flax_utils.py +4 -9
- diffusers/pipelines/pipeline_loading_utils.py +0 -3
- diffusers/pipelines/pipeline_utils.py +2 -14
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +0 -1
- diffusers/pipelines/stable_audio/__init__.py +50 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +745 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +2 -0
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +23 -29
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +15 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +30 -29
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +23 -152
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +8 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +8 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +6 -6
- diffusers/pipelines/stable_diffusion_3/__init__.py +2 -0
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +34 -3
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +33 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1201 -0
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +3 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +6 -6
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +5 -5
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +5 -5
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +6 -6
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +0 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +23 -29
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +27 -29
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +3 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +17 -27
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +26 -29
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +17 -145
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +0 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +6 -6
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -28
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +8 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +8 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +6 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +0 -4
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -4
- diffusers/schedulers/__init__.py +8 -0
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
- diffusers/schedulers/scheduling_ddim.py +1 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +449 -0
- diffusers/schedulers/scheduling_ddpm.py +1 -1
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -1
- diffusers/schedulers/scheduling_deis_multistep.py +2 -2
- diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +1 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +1 -1
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +64 -19
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +2 -2
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +63 -39
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +321 -0
- diffusers/schedulers/scheduling_ipndm.py +1 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +1 -1
- diffusers/schedulers/scheduling_utils.py +1 -3
- diffusers/schedulers/scheduling_utils_flax.py +1 -3
- diffusers/training_utils.py +99 -14
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +210 -0
- diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +315 -0
- diffusers/utils/dynamic_modules_utils.py +1 -11
- diffusers/utils/export_utils.py +50 -6
- diffusers/utils/hub_utils.py +45 -42
- diffusers/utils/import_utils.py +37 -15
- diffusers/utils/loading_utils.py +80 -3
- diffusers/utils/testing_utils.py +11 -8
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/METADATA +73 -83
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/RECORD +217 -164
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/WHEEL +1 -1
- diffusers/loaders/autoencoder.py +0 -146
- diffusers/loaders/controlnet.py +0 -136
- diffusers/loaders/lora.py +0 -1728
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/LICENSE +0 -0
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/top_level.txt +0 -0
diffusers/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
__version__ = "0.
|
1
|
+
__version__ = "0.30.1"
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING
|
4
4
|
|
@@ -12,6 +12,7 @@ from .utils import (
|
|
12
12
|
is_note_seq_available,
|
13
13
|
is_onnx_available,
|
14
14
|
is_scipy_available,
|
15
|
+
is_sentencepiece_available,
|
15
16
|
is_torch_available,
|
16
17
|
is_torchsde_available,
|
17
18
|
is_transformers_available,
|
@@ -76,16 +77,25 @@ else:
|
|
76
77
|
_import_structure["models"].extend(
|
77
78
|
[
|
78
79
|
"AsymmetricAutoencoderKL",
|
80
|
+
"AuraFlowTransformer2DModel",
|
79
81
|
"AutoencoderKL",
|
82
|
+
"AutoencoderKLCogVideoX",
|
80
83
|
"AutoencoderKLTemporalDecoder",
|
84
|
+
"AutoencoderOobleck",
|
81
85
|
"AutoencoderTiny",
|
86
|
+
"CogVideoXTransformer3DModel",
|
82
87
|
"ConsistencyDecoderVAE",
|
83
88
|
"ControlNetModel",
|
84
89
|
"ControlNetXSAdapter",
|
85
90
|
"DiTTransformer2DModel",
|
91
|
+
"FluxTransformer2DModel",
|
92
|
+
"HunyuanDiT2DControlNetModel",
|
86
93
|
"HunyuanDiT2DModel",
|
94
|
+
"HunyuanDiT2DMultiControlNetModel",
|
87
95
|
"I2VGenXLUNet",
|
88
96
|
"Kandinsky3UNet",
|
97
|
+
"LatteTransformer3DModel",
|
98
|
+
"LuminaNextDiT2DModel",
|
89
99
|
"ModelMixin",
|
90
100
|
"MotionAdapter",
|
91
101
|
"MultiAdapter",
|
@@ -94,6 +104,8 @@ else:
|
|
94
104
|
"SD3ControlNetModel",
|
95
105
|
"SD3MultiControlNetModel",
|
96
106
|
"SD3Transformer2DModel",
|
107
|
+
"SparseControlNetModel",
|
108
|
+
"StableAudioDiTModel",
|
97
109
|
"StableCascadeUNet",
|
98
110
|
"T2IAdapter",
|
99
111
|
"T5FilmDecoder",
|
@@ -145,6 +157,8 @@ else:
|
|
145
157
|
[
|
146
158
|
"AmusedScheduler",
|
147
159
|
"CMStochasticIterativeScheduler",
|
160
|
+
"CogVideoXDDIMScheduler",
|
161
|
+
"CogVideoXDPMScheduler",
|
148
162
|
"DDIMInverseScheduler",
|
149
163
|
"DDIMParallelScheduler",
|
150
164
|
"DDIMScheduler",
|
@@ -160,6 +174,7 @@ else:
|
|
160
174
|
"EulerAncestralDiscreteScheduler",
|
161
175
|
"EulerDiscreteScheduler",
|
162
176
|
"FlowMatchEulerDiscreteScheduler",
|
177
|
+
"FlowMatchHeunDiscreteScheduler",
|
163
178
|
"HeunDiscreteScheduler",
|
164
179
|
"IPNDMScheduler",
|
165
180
|
"KarrasVeScheduler",
|
@@ -203,7 +218,7 @@ except OptionalDependencyNotAvailable:
|
|
203
218
|
]
|
204
219
|
|
205
220
|
else:
|
206
|
-
_import_structure["schedulers"].extend(["DPMSolverSDEScheduler"])
|
221
|
+
_import_structure["schedulers"].extend(["CosineDPMSolverMultistepScheduler", "DPMSolverSDEScheduler"])
|
207
222
|
|
208
223
|
try:
|
209
224
|
if not (is_torch_available() and is_transformers_available()):
|
@@ -223,17 +238,25 @@ else:
|
|
223
238
|
"AmusedImg2ImgPipeline",
|
224
239
|
"AmusedInpaintPipeline",
|
225
240
|
"AmusedPipeline",
|
241
|
+
"AnimateDiffControlNetPipeline",
|
242
|
+
"AnimateDiffPAGPipeline",
|
226
243
|
"AnimateDiffPipeline",
|
227
244
|
"AnimateDiffSDXLPipeline",
|
245
|
+
"AnimateDiffSparseControlNetPipeline",
|
228
246
|
"AnimateDiffVideoToVideoPipeline",
|
229
247
|
"AudioLDM2Pipeline",
|
230
248
|
"AudioLDM2ProjectionModel",
|
231
249
|
"AudioLDM2UNet2DConditionModel",
|
232
250
|
"AudioLDMPipeline",
|
251
|
+
"AuraFlowPipeline",
|
233
252
|
"BlipDiffusionControlNetPipeline",
|
234
253
|
"BlipDiffusionPipeline",
|
235
254
|
"CLIPImageProjection",
|
255
|
+
"CogVideoXPipeline",
|
236
256
|
"CycleDiffusionPipeline",
|
257
|
+
"FluxPipeline",
|
258
|
+
"HunyuanDiTControlNetPipeline",
|
259
|
+
"HunyuanDiTPAGPipeline",
|
237
260
|
"HunyuanDiTPipeline",
|
238
261
|
"I2VGenXLPipeline",
|
239
262
|
"IFImg2ImgPipeline",
|
@@ -264,29 +287,37 @@ else:
|
|
264
287
|
"KandinskyV22PriorPipeline",
|
265
288
|
"LatentConsistencyModelImg2ImgPipeline",
|
266
289
|
"LatentConsistencyModelPipeline",
|
290
|
+
"LattePipeline",
|
267
291
|
"LDMTextToImagePipeline",
|
268
292
|
"LEditsPPPipelineStableDiffusion",
|
269
293
|
"LEditsPPPipelineStableDiffusionXL",
|
294
|
+
"LuminaText2ImgPipeline",
|
270
295
|
"MarigoldDepthPipeline",
|
271
296
|
"MarigoldNormalsPipeline",
|
272
297
|
"MusicLDMPipeline",
|
273
298
|
"PaintByExamplePipeline",
|
274
299
|
"PIAPipeline",
|
275
300
|
"PixArtAlphaPipeline",
|
301
|
+
"PixArtSigmaPAGPipeline",
|
276
302
|
"PixArtSigmaPipeline",
|
277
303
|
"SemanticStableDiffusionPipeline",
|
278
304
|
"ShapEImg2ImgPipeline",
|
279
305
|
"ShapEPipeline",
|
306
|
+
"StableAudioPipeline",
|
307
|
+
"StableAudioProjectionModel",
|
280
308
|
"StableCascadeCombinedPipeline",
|
281
309
|
"StableCascadeDecoderPipeline",
|
282
310
|
"StableCascadePriorPipeline",
|
283
311
|
"StableDiffusion3ControlNetPipeline",
|
284
312
|
"StableDiffusion3Img2ImgPipeline",
|
313
|
+
"StableDiffusion3InpaintPipeline",
|
314
|
+
"StableDiffusion3PAGPipeline",
|
285
315
|
"StableDiffusion3Pipeline",
|
286
316
|
"StableDiffusionAdapterPipeline",
|
287
317
|
"StableDiffusionAttendAndExcitePipeline",
|
288
318
|
"StableDiffusionControlNetImg2ImgPipeline",
|
289
319
|
"StableDiffusionControlNetInpaintPipeline",
|
320
|
+
"StableDiffusionControlNetPAGPipeline",
|
290
321
|
"StableDiffusionControlNetPipeline",
|
291
322
|
"StableDiffusionControlNetXSPipeline",
|
292
323
|
"StableDiffusionDepth2ImgPipeline",
|
@@ -301,6 +332,7 @@ else:
|
|
301
332
|
"StableDiffusionLatentUpscalePipeline",
|
302
333
|
"StableDiffusionLDM3DPipeline",
|
303
334
|
"StableDiffusionModelEditingPipeline",
|
335
|
+
"StableDiffusionPAGPipeline",
|
304
336
|
"StableDiffusionPanoramaPipeline",
|
305
337
|
"StableDiffusionParadigmsPipeline",
|
306
338
|
"StableDiffusionPipeline",
|
@@ -311,11 +343,15 @@ else:
|
|
311
343
|
"StableDiffusionXLAdapterPipeline",
|
312
344
|
"StableDiffusionXLControlNetImg2ImgPipeline",
|
313
345
|
"StableDiffusionXLControlNetInpaintPipeline",
|
346
|
+
"StableDiffusionXLControlNetPAGPipeline",
|
314
347
|
"StableDiffusionXLControlNetPipeline",
|
315
348
|
"StableDiffusionXLControlNetXSPipeline",
|
316
349
|
"StableDiffusionXLImg2ImgPipeline",
|
317
350
|
"StableDiffusionXLInpaintPipeline",
|
318
351
|
"StableDiffusionXLInstructPix2PixPipeline",
|
352
|
+
"StableDiffusionXLPAGImg2ImgPipeline",
|
353
|
+
"StableDiffusionXLPAGInpaintPipeline",
|
354
|
+
"StableDiffusionXLPAGPipeline",
|
319
355
|
"StableDiffusionXLPipeline",
|
320
356
|
"StableUnCLIPImg2ImgPipeline",
|
321
357
|
"StableUnCLIPPipeline",
|
@@ -353,6 +389,19 @@ except OptionalDependencyNotAvailable:
|
|
353
389
|
else:
|
354
390
|
_import_structure["pipelines"].extend(["StableDiffusionKDiffusionPipeline", "StableDiffusionXLKDiffusionPipeline"])
|
355
391
|
|
392
|
+
try:
|
393
|
+
if not (is_torch_available() and is_transformers_available() and is_sentencepiece_available()):
|
394
|
+
raise OptionalDependencyNotAvailable()
|
395
|
+
except OptionalDependencyNotAvailable:
|
396
|
+
from .utils import dummy_torch_and_transformers_and_sentencepiece_objects # noqa F403
|
397
|
+
|
398
|
+
_import_structure["utils.dummy_torch_and_transformers_and_sentencepiece_objects"] = [
|
399
|
+
name for name in dir(dummy_torch_and_transformers_and_sentencepiece_objects) if not name.startswith("_")
|
400
|
+
]
|
401
|
+
|
402
|
+
else:
|
403
|
+
_import_structure["pipelines"].extend(["KolorsImg2ImgPipeline", "KolorsPAGPipeline", "KolorsPipeline"])
|
404
|
+
|
356
405
|
try:
|
357
406
|
if not (is_torch_available() and is_transformers_available() and is_onnx_available()):
|
358
407
|
raise OptionalDependencyNotAvailable()
|
@@ -489,16 +538,25 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
489
538
|
else:
|
490
539
|
from .models import (
|
491
540
|
AsymmetricAutoencoderKL,
|
541
|
+
AuraFlowTransformer2DModel,
|
492
542
|
AutoencoderKL,
|
543
|
+
AutoencoderKLCogVideoX,
|
493
544
|
AutoencoderKLTemporalDecoder,
|
545
|
+
AutoencoderOobleck,
|
494
546
|
AutoencoderTiny,
|
547
|
+
CogVideoXTransformer3DModel,
|
495
548
|
ConsistencyDecoderVAE,
|
496
549
|
ControlNetModel,
|
497
550
|
ControlNetXSAdapter,
|
498
551
|
DiTTransformer2DModel,
|
552
|
+
FluxTransformer2DModel,
|
553
|
+
HunyuanDiT2DControlNetModel,
|
499
554
|
HunyuanDiT2DModel,
|
555
|
+
HunyuanDiT2DMultiControlNetModel,
|
500
556
|
I2VGenXLUNet,
|
501
557
|
Kandinsky3UNet,
|
558
|
+
LatteTransformer3DModel,
|
559
|
+
LuminaNextDiT2DModel,
|
502
560
|
ModelMixin,
|
503
561
|
MotionAdapter,
|
504
562
|
MultiAdapter,
|
@@ -507,6 +565,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
507
565
|
SD3ControlNetModel,
|
508
566
|
SD3MultiControlNetModel,
|
509
567
|
SD3Transformer2DModel,
|
568
|
+
SparseControlNetModel,
|
569
|
+
StableAudioDiTModel,
|
510
570
|
T2IAdapter,
|
511
571
|
T5FilmDecoder,
|
512
572
|
Transformer2DModel,
|
@@ -555,6 +615,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
555
615
|
from .schedulers import (
|
556
616
|
AmusedScheduler,
|
557
617
|
CMStochasticIterativeScheduler,
|
618
|
+
CogVideoXDDIMScheduler,
|
619
|
+
CogVideoXDPMScheduler,
|
558
620
|
DDIMInverseScheduler,
|
559
621
|
DDIMParallelScheduler,
|
560
622
|
DDIMScheduler,
|
@@ -570,6 +632,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
570
632
|
EulerAncestralDiscreteScheduler,
|
571
633
|
EulerDiscreteScheduler,
|
572
634
|
FlowMatchEulerDiscreteScheduler,
|
635
|
+
FlowMatchHeunDiscreteScheduler,
|
573
636
|
HeunDiscreteScheduler,
|
574
637
|
IPNDMScheduler,
|
575
638
|
KarrasVeScheduler,
|
@@ -602,7 +665,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
602
665
|
except OptionalDependencyNotAvailable:
|
603
666
|
from .utils.dummy_torch_and_torchsde_objects import * # noqa F403
|
604
667
|
else:
|
605
|
-
from .schedulers import DPMSolverSDEScheduler
|
668
|
+
from .schedulers import CosineDPMSolverMultistepScheduler, DPMSolverSDEScheduler
|
606
669
|
|
607
670
|
try:
|
608
671
|
if not (is_torch_available() and is_transformers_available()):
|
@@ -616,15 +679,23 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
616
679
|
AmusedImg2ImgPipeline,
|
617
680
|
AmusedInpaintPipeline,
|
618
681
|
AmusedPipeline,
|
682
|
+
AnimateDiffControlNetPipeline,
|
683
|
+
AnimateDiffPAGPipeline,
|
619
684
|
AnimateDiffPipeline,
|
620
685
|
AnimateDiffSDXLPipeline,
|
686
|
+
AnimateDiffSparseControlNetPipeline,
|
621
687
|
AnimateDiffVideoToVideoPipeline,
|
622
688
|
AudioLDM2Pipeline,
|
623
689
|
AudioLDM2ProjectionModel,
|
624
690
|
AudioLDM2UNet2DConditionModel,
|
625
691
|
AudioLDMPipeline,
|
692
|
+
AuraFlowPipeline,
|
626
693
|
CLIPImageProjection,
|
694
|
+
CogVideoXPipeline,
|
627
695
|
CycleDiffusionPipeline,
|
696
|
+
FluxPipeline,
|
697
|
+
HunyuanDiTControlNetPipeline,
|
698
|
+
HunyuanDiTPAGPipeline,
|
628
699
|
HunyuanDiTPipeline,
|
629
700
|
I2VGenXLPipeline,
|
630
701
|
IFImg2ImgPipeline,
|
@@ -655,29 +726,37 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
655
726
|
KandinskyV22PriorPipeline,
|
656
727
|
LatentConsistencyModelImg2ImgPipeline,
|
657
728
|
LatentConsistencyModelPipeline,
|
729
|
+
LattePipeline,
|
658
730
|
LDMTextToImagePipeline,
|
659
731
|
LEditsPPPipelineStableDiffusion,
|
660
732
|
LEditsPPPipelineStableDiffusionXL,
|
733
|
+
LuminaText2ImgPipeline,
|
661
734
|
MarigoldDepthPipeline,
|
662
735
|
MarigoldNormalsPipeline,
|
663
736
|
MusicLDMPipeline,
|
664
737
|
PaintByExamplePipeline,
|
665
738
|
PIAPipeline,
|
666
739
|
PixArtAlphaPipeline,
|
740
|
+
PixArtSigmaPAGPipeline,
|
667
741
|
PixArtSigmaPipeline,
|
668
742
|
SemanticStableDiffusionPipeline,
|
669
743
|
ShapEImg2ImgPipeline,
|
670
744
|
ShapEPipeline,
|
745
|
+
StableAudioPipeline,
|
746
|
+
StableAudioProjectionModel,
|
671
747
|
StableCascadeCombinedPipeline,
|
672
748
|
StableCascadeDecoderPipeline,
|
673
749
|
StableCascadePriorPipeline,
|
674
750
|
StableDiffusion3ControlNetPipeline,
|
675
751
|
StableDiffusion3Img2ImgPipeline,
|
752
|
+
StableDiffusion3InpaintPipeline,
|
753
|
+
StableDiffusion3PAGPipeline,
|
676
754
|
StableDiffusion3Pipeline,
|
677
755
|
StableDiffusionAdapterPipeline,
|
678
756
|
StableDiffusionAttendAndExcitePipeline,
|
679
757
|
StableDiffusionControlNetImg2ImgPipeline,
|
680
758
|
StableDiffusionControlNetInpaintPipeline,
|
759
|
+
StableDiffusionControlNetPAGPipeline,
|
681
760
|
StableDiffusionControlNetPipeline,
|
682
761
|
StableDiffusionControlNetXSPipeline,
|
683
762
|
StableDiffusionDepth2ImgPipeline,
|
@@ -692,6 +771,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
692
771
|
StableDiffusionLatentUpscalePipeline,
|
693
772
|
StableDiffusionLDM3DPipeline,
|
694
773
|
StableDiffusionModelEditingPipeline,
|
774
|
+
StableDiffusionPAGPipeline,
|
695
775
|
StableDiffusionPanoramaPipeline,
|
696
776
|
StableDiffusionParadigmsPipeline,
|
697
777
|
StableDiffusionPipeline,
|
@@ -702,11 +782,15 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
702
782
|
StableDiffusionXLAdapterPipeline,
|
703
783
|
StableDiffusionXLControlNetImg2ImgPipeline,
|
704
784
|
StableDiffusionXLControlNetInpaintPipeline,
|
785
|
+
StableDiffusionXLControlNetPAGPipeline,
|
705
786
|
StableDiffusionXLControlNetPipeline,
|
706
787
|
StableDiffusionXLControlNetXSPipeline,
|
707
788
|
StableDiffusionXLImg2ImgPipeline,
|
708
789
|
StableDiffusionXLInpaintPipeline,
|
709
790
|
StableDiffusionXLInstructPix2PixPipeline,
|
791
|
+
StableDiffusionXLPAGImg2ImgPipeline,
|
792
|
+
StableDiffusionXLPAGInpaintPipeline,
|
793
|
+
StableDiffusionXLPAGPipeline,
|
710
794
|
StableDiffusionXLPipeline,
|
711
795
|
StableUnCLIPImg2ImgPipeline,
|
712
796
|
StableUnCLIPPipeline,
|
@@ -738,6 +822,13 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
738
822
|
else:
|
739
823
|
from .pipelines import StableDiffusionKDiffusionPipeline, StableDiffusionXLKDiffusionPipeline
|
740
824
|
|
825
|
+
try:
|
826
|
+
if not (is_torch_available() and is_transformers_available() and is_sentencepiece_available()):
|
827
|
+
raise OptionalDependencyNotAvailable()
|
828
|
+
except OptionalDependencyNotAvailable:
|
829
|
+
from .utils.dummy_torch_and_transformers_and_sentencepiece_objects import * # noqa F403
|
830
|
+
else:
|
831
|
+
from .pipelines import KolorsImg2ImgPipeline, KolorsPAGPipeline, KolorsPipeline
|
741
832
|
try:
|
742
833
|
if not (is_torch_available() and is_transformers_available() and is_onnx_available()):
|
743
834
|
raise OptionalDependencyNotAvailable()
|
diffusers/commands/env.py
CHANGED
@@ -24,7 +24,6 @@ from ..utils import (
|
|
24
24
|
is_bitsandbytes_available,
|
25
25
|
is_flax_available,
|
26
26
|
is_google_colab,
|
27
|
-
is_notebook,
|
28
27
|
is_peft_available,
|
29
28
|
is_safetensors_available,
|
30
29
|
is_torch_available,
|
@@ -107,8 +106,6 @@ class EnvironmentCommand(BaseDiffusersCLICommand):
|
|
107
106
|
|
108
107
|
platform_info = platform.platform()
|
109
108
|
|
110
|
-
is_notebook_str = "Yes" if is_notebook() else "No"
|
111
|
-
|
112
109
|
is_google_colab_str = "Yes" if is_google_colab() else "No"
|
113
110
|
|
114
111
|
accelerator = "NA"
|
@@ -123,7 +120,7 @@ class EnvironmentCommand(BaseDiffusersCLICommand):
|
|
123
120
|
out_str = out_str.decode("utf-8")
|
124
121
|
|
125
122
|
if len(out_str) > 0:
|
126
|
-
accelerator = out_str.strip()
|
123
|
+
accelerator = out_str.strip()
|
127
124
|
except FileNotFoundError:
|
128
125
|
pass
|
129
126
|
elif platform.system() == "Darwin": # Mac OS
|
@@ -155,7 +152,6 @@ class EnvironmentCommand(BaseDiffusersCLICommand):
|
|
155
152
|
info = {
|
156
153
|
"🤗 Diffusers version": version,
|
157
154
|
"Platform": platform_info,
|
158
|
-
"Running on a notebook?": is_notebook_str,
|
159
155
|
"Running on Google Colab?": is_google_colab_str,
|
160
156
|
"Python version": platform.python_version(),
|
161
157
|
"PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})",
|
diffusers/configuration_utils.py
CHANGED
@@ -23,7 +23,7 @@ import json
|
|
23
23
|
import os
|
24
24
|
import re
|
25
25
|
from collections import OrderedDict
|
26
|
-
from pathlib import
|
26
|
+
from pathlib import Path
|
27
27
|
from typing import Any, Dict, Tuple, Union
|
28
28
|
|
29
29
|
import numpy as np
|
@@ -310,9 +310,6 @@ class ConfigMixin:
|
|
310
310
|
force_download (`bool`, *optional*, defaults to `False`):
|
311
311
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
312
312
|
cached versions if they exist.
|
313
|
-
resume_download:
|
314
|
-
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
315
|
-
of Diffusers.
|
316
313
|
proxies (`Dict[str, str]`, *optional*):
|
317
314
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
318
315
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
@@ -343,7 +340,6 @@ class ConfigMixin:
|
|
343
340
|
local_dir = kwargs.pop("local_dir", None)
|
344
341
|
local_dir_use_symlinks = kwargs.pop("local_dir_use_symlinks", "auto")
|
345
342
|
force_download = kwargs.pop("force_download", False)
|
346
|
-
resume_download = kwargs.pop("resume_download", None)
|
347
343
|
proxies = kwargs.pop("proxies", None)
|
348
344
|
token = kwargs.pop("token", None)
|
349
345
|
local_files_only = kwargs.pop("local_files_only", False)
|
@@ -386,7 +382,6 @@ class ConfigMixin:
|
|
386
382
|
cache_dir=cache_dir,
|
387
383
|
force_download=force_download,
|
388
384
|
proxies=proxies,
|
389
|
-
resume_download=resume_download,
|
390
385
|
local_files_only=local_files_only,
|
391
386
|
token=token,
|
392
387
|
user_agent=user_agent,
|
@@ -587,8 +582,8 @@ class ConfigMixin:
|
|
587
582
|
def to_json_saveable(value):
|
588
583
|
if isinstance(value, np.ndarray):
|
589
584
|
value = value.tolist()
|
590
|
-
elif isinstance(value,
|
591
|
-
value =
|
585
|
+
elif isinstance(value, Path):
|
586
|
+
value = value.as_posix()
|
592
587
|
return value
|
593
588
|
|
594
589
|
config_dict = {k: to_json_saveable(v) for k, v in config_dict.items()}
|
@@ -716,7 +711,7 @@ class LegacyConfigMixin(ConfigMixin):
|
|
716
711
|
|
717
712
|
@classmethod
|
718
713
|
def from_config(cls, config: Union[FrozenDict, Dict[str, Any]] = None, return_unused_kwargs=False, **kwargs):
|
719
|
-
# To prevent
|
714
|
+
# To prevent dependency import problem.
|
720
715
|
from .models.model_loading_utils import _fetch_remapped_cls_from_config
|
721
716
|
|
722
717
|
# resolve remapping
|
@@ -3,7 +3,7 @@
|
|
3
3
|
# 2. run `make deps_table_update`
|
4
4
|
deps = {
|
5
5
|
"Pillow": "Pillow",
|
6
|
-
"accelerate": "accelerate>=0.
|
6
|
+
"accelerate": "accelerate>=0.31.0",
|
7
7
|
"compel": "compel==0.1.8",
|
8
8
|
"datasets": "datasets",
|
9
9
|
"filelock": "filelock",
|
@@ -40,7 +40,7 @@ deps = {
|
|
40
40
|
"tensorboard": "tensorboard",
|
41
41
|
"torch": "torch>=1.4",
|
42
42
|
"torchvision": "torchvision",
|
43
|
-
"transformers": "transformers>=4.
|
43
|
+
"transformers": "transformers>=4.41.2",
|
44
44
|
"urllib3": "urllib3<=2.0.0",
|
45
45
|
"black": "black",
|
46
46
|
}
|
diffusers/image_processor.py
CHANGED
@@ -569,7 +569,7 @@ class VaeImageProcessor(ConfigMixin):
|
|
569
569
|
|
570
570
|
channel = image.shape[1]
|
571
571
|
# don't need any preprocess if the image is latents
|
572
|
-
if channel ==
|
572
|
+
if channel == self.vae_latent_channels:
|
573
573
|
return image
|
574
574
|
|
575
575
|
height, width = self.get_default_height_width(image, height, width)
|
@@ -585,7 +585,6 @@ class VaeImageProcessor(ConfigMixin):
|
|
585
585
|
FutureWarning,
|
586
586
|
)
|
587
587
|
do_normalize = False
|
588
|
-
|
589
588
|
if do_normalize:
|
590
589
|
image = self.normalize(image)
|
591
590
|
|
diffusers/loaders/__init__.py
CHANGED
@@ -55,11 +55,19 @@ _import_structure = {}
|
|
55
55
|
|
56
56
|
if is_torch_available():
|
57
57
|
_import_structure["single_file_model"] = ["FromOriginalModelMixin"]
|
58
|
+
|
58
59
|
_import_structure["unet"] = ["UNet2DConditionLoadersMixin"]
|
59
60
|
_import_structure["utils"] = ["AttnProcsLayers"]
|
60
61
|
if is_transformers_available():
|
61
62
|
_import_structure["single_file"] = ["FromSingleFileMixin"]
|
62
|
-
_import_structure["
|
63
|
+
_import_structure["lora_pipeline"] = [
|
64
|
+
"AmusedLoraLoaderMixin",
|
65
|
+
"StableDiffusionLoraLoaderMixin",
|
66
|
+
"SD3LoraLoaderMixin",
|
67
|
+
"StableDiffusionXLLoraLoaderMixin",
|
68
|
+
"LoraLoaderMixin",
|
69
|
+
"FluxLoraLoaderMixin",
|
70
|
+
]
|
63
71
|
_import_structure["textual_inversion"] = ["TextualInversionLoaderMixin"]
|
64
72
|
_import_structure["ip_adapter"] = ["IPAdapterMixin"]
|
65
73
|
|
@@ -74,7 +82,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
74
82
|
|
75
83
|
if is_transformers_available():
|
76
84
|
from .ip_adapter import IPAdapterMixin
|
77
|
-
from .
|
85
|
+
from .lora_pipeline import (
|
86
|
+
AmusedLoraLoaderMixin,
|
87
|
+
FluxLoraLoaderMixin,
|
88
|
+
LoraLoaderMixin,
|
89
|
+
SD3LoraLoaderMixin,
|
90
|
+
StableDiffusionLoraLoaderMixin,
|
91
|
+
StableDiffusionXLLoraLoaderMixin,
|
92
|
+
)
|
78
93
|
from .single_file import FromSingleFileMixin
|
79
94
|
from .textual_inversion import TextualInversionLoaderMixin
|
80
95
|
|
diffusers/loaders/ip_adapter.py
CHANGED
@@ -90,9 +90,7 @@ class IPAdapterMixin:
|
|
90
90
|
force_download (`bool`, *optional*, defaults to `False`):
|
91
91
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
92
92
|
cached versions if they exist.
|
93
|
-
|
94
|
-
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
95
|
-
of Diffusers.
|
93
|
+
|
96
94
|
proxies (`Dict[str, str]`, *optional*):
|
97
95
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
98
96
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
@@ -135,7 +133,6 @@ class IPAdapterMixin:
|
|
135
133
|
# Load the main state dict first.
|
136
134
|
cache_dir = kwargs.pop("cache_dir", None)
|
137
135
|
force_download = kwargs.pop("force_download", False)
|
138
|
-
resume_download = kwargs.pop("resume_download", None)
|
139
136
|
proxies = kwargs.pop("proxies", None)
|
140
137
|
local_files_only = kwargs.pop("local_files_only", None)
|
141
138
|
token = kwargs.pop("token", None)
|
@@ -171,7 +168,6 @@ class IPAdapterMixin:
|
|
171
168
|
weights_name=weight_name,
|
172
169
|
cache_dir=cache_dir,
|
173
170
|
force_download=force_download,
|
174
|
-
resume_download=resume_download,
|
175
171
|
proxies=proxies,
|
176
172
|
local_files_only=local_files_only,
|
177
173
|
token=token,
|
@@ -226,7 +222,8 @@ class IPAdapterMixin:
|
|
226
222
|
|
227
223
|
# create feature extractor if it has not been registered to the pipeline yet
|
228
224
|
if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is None:
|
229
|
-
|
225
|
+
clip_image_size = self.image_encoder.config.image_size
|
226
|
+
feature_extractor = CLIPImageProcessor(size=clip_image_size, crop_size=clip_image_size)
|
230
227
|
self.register_modules(feature_extractor=feature_extractor)
|
231
228
|
|
232
229
|
# load ip-adapter into unet
|
@@ -323,7 +320,13 @@ class IPAdapterMixin:
|
|
323
320
|
|
324
321
|
# remove hidden encoder
|
325
322
|
self.unet.encoder_hid_proj = None
|
326
|
-
self.config.encoder_hid_dim_type = None
|
323
|
+
self.unet.config.encoder_hid_dim_type = None
|
324
|
+
|
325
|
+
# Kolors: restore `encoder_hid_proj` with `text_encoder_hid_proj`
|
326
|
+
if hasattr(self.unet, "text_encoder_hid_proj") and self.unet.text_encoder_hid_proj is not None:
|
327
|
+
self.unet.encoder_hid_proj = self.unet.text_encoder_hid_proj
|
328
|
+
self.unet.text_encoder_hid_proj = None
|
329
|
+
self.unet.config.encoder_hid_dim_type = "text_proj"
|
327
330
|
|
328
331
|
# restore original Unet attention processors layers
|
329
332
|
attn_procs = {}
|