diffusers 0.30.3__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +97 -4
- diffusers/callbacks.py +56 -3
- diffusers/configuration_utils.py +13 -1
- diffusers/image_processor.py +282 -71
- diffusers/loaders/__init__.py +24 -3
- diffusers/loaders/ip_adapter.py +543 -16
- diffusers/loaders/lora_base.py +138 -125
- diffusers/loaders/lora_conversion_utils.py +647 -0
- diffusers/loaders/lora_pipeline.py +2216 -230
- diffusers/loaders/peft.py +380 -0
- diffusers/loaders/single_file_model.py +71 -4
- diffusers/loaders/single_file_utils.py +597 -10
- diffusers/loaders/textual_inversion.py +5 -3
- diffusers/loaders/transformer_flux.py +181 -0
- diffusers/loaders/transformer_sd3.py +89 -0
- diffusers/loaders/unet.py +56 -12
- diffusers/models/__init__.py +49 -12
- diffusers/models/activations.py +22 -9
- diffusers/models/adapter.py +53 -53
- diffusers/models/attention.py +98 -13
- diffusers/models/attention_flax.py +1 -1
- diffusers/models/attention_processor.py +2160 -346
- diffusers/models/autoencoders/__init__.py +5 -0
- diffusers/models/autoencoders/autoencoder_dc.py +620 -0
- diffusers/models/autoencoders/autoencoder_kl.py +73 -12
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +213 -105
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
- diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
- diffusers/models/autoencoders/vae.py +18 -5
- diffusers/models/controlnet.py +47 -802
- diffusers/models/controlnet_flux.py +70 -0
- diffusers/models/controlnet_sd3.py +26 -376
- diffusers/models/controlnet_sparsectrl.py +46 -719
- diffusers/models/controlnets/__init__.py +23 -0
- diffusers/models/controlnets/controlnet.py +872 -0
- diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
- diffusers/models/controlnets/controlnet_flux.py +536 -0
- diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
- diffusers/models/controlnets/controlnet_sd3.py +489 -0
- diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
- diffusers/models/controlnets/controlnet_union.py +832 -0
- diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
- diffusers/models/controlnets/multicontrolnet.py +183 -0
- diffusers/models/embeddings.py +996 -92
- diffusers/models/embeddings_flax.py +23 -9
- diffusers/models/model_loading_utils.py +264 -14
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +334 -51
- diffusers/models/normalization.py +157 -13
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +3 -2
- diffusers/models/transformers/cogvideox_transformer_3d.py +69 -13
- diffusers/models/transformers/dit_transformer_2d.py +1 -1
- diffusers/models/transformers/latte_transformer_3d.py +4 -4
- diffusers/models/transformers/pixart_transformer_2d.py +10 -2
- diffusers/models/transformers/sana_transformer.py +488 -0
- diffusers/models/transformers/stable_audio_transformer.py +1 -1
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +422 -0
- diffusers/models/transformers/transformer_cogview3plus.py +386 -0
- diffusers/models/transformers/transformer_flux.py +189 -51
- diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
- diffusers/models/transformers/transformer_ltx.py +469 -0
- diffusers/models/transformers/transformer_mochi.py +499 -0
- diffusers/models/transformers/transformer_sd3.py +112 -18
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +8 -1
- diffusers/models/unets/unet_2d_blocks.py +88 -21
- diffusers/models/unets/unet_2d_condition.py +9 -9
- diffusers/models/unets/unet_3d_blocks.py +9 -7
- diffusers/models/unets/unet_motion_model.py +46 -68
- diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
- diffusers/models/unets/unet_stable_cascade.py +2 -2
- diffusers/models/unets/uvit_2d.py +1 -1
- diffusers/models/upsampling.py +14 -6
- diffusers/pipelines/__init__.py +69 -6
- diffusers/pipelines/allegro/__init__.py +48 -0
- diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
- diffusers/pipelines/allegro/pipeline_output.py +23 -0
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +52 -22
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +3 -1
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -72
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +2 -9
- diffusers/pipelines/auto_pipeline.py +88 -10
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/cogvideo/__init__.py +2 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +80 -39
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +108 -50
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +89 -50
- diffusers/pipelines/cogview3/__init__.py +47 -0
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
- diffusers/pipelines/cogview3/pipeline_output.py +21 -0
- diffusers/pipelines/controlnet/__init__.py +86 -80
- diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +9 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +9 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +37 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +12 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +9 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +22 -4
- diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +56 -20
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
- diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +32 -9
- diffusers/pipelines/flux/__init__.py +23 -1
- diffusers/pipelines/flux/modeling_flux.py +47 -0
- diffusers/pipelines/flux/pipeline_flux.py +256 -48
- diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
- diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
- diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
- diffusers/pipelines/flux/pipeline_output.py +16 -0
- diffusers/pipelines/free_noise_utils.py +365 -5
- diffusers/pipelines/hunyuan_video/__init__.py +48 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
- diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +20 -4
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
- diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
- diffusers/pipelines/kolors/text_encoder.py +2 -2
- diffusers/pipelines/kolors/tokenizer.py +4 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latte/pipeline_latte.py +2 -2
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
- diffusers/pipelines/ltx/__init__.py +50 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
- diffusers/pipelines/ltx/pipeline_output.py +20 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +3 -10
- diffusers/pipelines/mochi/__init__.py +48 -0
- diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
- diffusers/pipelines/mochi/pipeline_output.py +20 -0
- diffusers/pipelines/pag/__init__.py +13 -0
- diffusers/pipelines/pag/pag_utils.py +8 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +2 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +22 -6
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +7 -14
- diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
- diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +18 -9
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
- diffusers/pipelines/pia/pipeline_pia.py +2 -0
- diffusers/pipelines/pipeline_flax_utils.py +1 -1
- diffusers/pipelines/pipeline_loading_utils.py +250 -31
- diffusers/pipelines/pipeline_utils.py +158 -186
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +7 -14
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +7 -14
- diffusers/pipelines/sana/__init__.py +47 -0
- diffusers/pipelines/sana/pipeline_output.py +21 -0
- diffusers/pipelines/sana/pipeline_sana.py +884 -0
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +46 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +228 -23
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +82 -13
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +60 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -22
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -22
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
- diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/quantizers/__init__.py +16 -0
- diffusers/quantizers/auto.py +139 -0
- diffusers/quantizers/base.py +233 -0
- diffusers/quantizers/bitsandbytes/__init__.py +2 -0
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
- diffusers/quantizers/bitsandbytes/utils.py +306 -0
- diffusers/quantizers/gguf/__init__.py +1 -0
- diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
- diffusers/quantizers/gguf/utils.py +456 -0
- diffusers/quantizers/quantization_config.py +669 -0
- diffusers/quantizers/torchao/__init__.py +15 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
- diffusers/schedulers/scheduling_ddim.py +4 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
- diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
- diffusers/schedulers/scheduling_ddpm.py +6 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +6 -7
- diffusers/schedulers/scheduling_deis_multistep.py +102 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +113 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +111 -5
- diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +126 -7
- diffusers/schedulers/scheduling_edm_euler.py +8 -6
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
- diffusers/schedulers/scheduling_euler_discrete.py +92 -7
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
- diffusers/schedulers/scheduling_heun_discrete.py +114 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
- diffusers/schedulers/scheduling_lcm.py +2 -6
- diffusers/schedulers/scheduling_lms_discrete.py +76 -1
- diffusers/schedulers/scheduling_repaint.py +1 -1
- diffusers/schedulers/scheduling_sasolver.py +102 -6
- diffusers/schedulers/scheduling_tcd.py +2 -6
- diffusers/schedulers/scheduling_unclip.py +4 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +127 -5
- diffusers/training_utils.py +63 -19
- diffusers/utils/__init__.py +7 -1
- diffusers/utils/constants.py +1 -0
- diffusers/utils/dummy_pt_objects.py +240 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +435 -0
- diffusers/utils/dynamic_modules_utils.py +3 -3
- diffusers/utils/hub_utils.py +44 -40
- diffusers/utils/import_utils.py +98 -8
- diffusers/utils/loading_utils.py +28 -4
- diffusers/utils/peft_utils.py +6 -3
- diffusers/utils/testing_utils.py +115 -1
- diffusers/utils/torch_utils.py +3 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/METADATA +73 -72
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/RECORD +268 -193
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0
diffusers/image_processor.py
CHANGED
@@ -38,16 +38,44 @@ PipelineImageInput = Union[
|
|
38
38
|
PipelineDepthInput = PipelineImageInput
|
39
39
|
|
40
40
|
|
41
|
-
def is_valid_image(image):
|
41
|
+
def is_valid_image(image) -> bool:
|
42
|
+
r"""
|
43
|
+
Checks if the input is a valid image.
|
44
|
+
|
45
|
+
A valid image can be:
|
46
|
+
- A `PIL.Image.Image`.
|
47
|
+
- A 2D or 3D `np.ndarray` or `torch.Tensor` (grayscale or color image).
|
48
|
+
|
49
|
+
Args:
|
50
|
+
image (`Union[PIL.Image.Image, np.ndarray, torch.Tensor]`):
|
51
|
+
The image to validate. It can be a PIL image, a NumPy array, or a torch tensor.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
`bool`:
|
55
|
+
`True` if the input is a valid image, `False` otherwise.
|
56
|
+
"""
|
42
57
|
return isinstance(image, PIL.Image.Image) or isinstance(image, (np.ndarray, torch.Tensor)) and image.ndim in (2, 3)
|
43
58
|
|
44
59
|
|
45
60
|
def is_valid_image_imagelist(images):
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
61
|
+
r"""
|
62
|
+
Checks if the input is a valid image or list of images.
|
63
|
+
|
64
|
+
The input can be one of the following formats:
|
65
|
+
- A 4D tensor or numpy array (batch of images).
|
66
|
+
- A valid single image: `PIL.Image.Image`, 2D `np.ndarray` or `torch.Tensor` (grayscale image), 3D `np.ndarray` or
|
67
|
+
`torch.Tensor`.
|
68
|
+
- A list of valid images.
|
69
|
+
|
70
|
+
Args:
|
71
|
+
images (`Union[np.ndarray, torch.Tensor, PIL.Image.Image, List]`):
|
72
|
+
The image(s) to check. Can be a batch of images (4D tensor/array), a single image, or a list of valid
|
73
|
+
images.
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
`bool`:
|
77
|
+
`True` if the input is valid, `False` otherwise.
|
78
|
+
"""
|
51
79
|
if isinstance(images, (np.ndarray, torch.Tensor)) and images.ndim == 4:
|
52
80
|
return True
|
53
81
|
elif is_valid_image(images):
|
@@ -103,8 +131,16 @@ class VaeImageProcessor(ConfigMixin):
|
|
103
131
|
|
104
132
|
@staticmethod
|
105
133
|
def numpy_to_pil(images: np.ndarray) -> List[PIL.Image.Image]:
|
106
|
-
"""
|
134
|
+
r"""
|
107
135
|
Convert a numpy image or a batch of images to a PIL image.
|
136
|
+
|
137
|
+
Args:
|
138
|
+
images (`np.ndarray`):
|
139
|
+
The image array to convert to PIL format.
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
`List[PIL.Image.Image]`:
|
143
|
+
A list of PIL images.
|
108
144
|
"""
|
109
145
|
if images.ndim == 3:
|
110
146
|
images = images[None, ...]
|
@@ -119,8 +155,16 @@ class VaeImageProcessor(ConfigMixin):
|
|
119
155
|
|
120
156
|
@staticmethod
|
121
157
|
def pil_to_numpy(images: Union[List[PIL.Image.Image], PIL.Image.Image]) -> np.ndarray:
|
122
|
-
"""
|
158
|
+
r"""
|
123
159
|
Convert a PIL image or a list of PIL images to NumPy arrays.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
images (`PIL.Image.Image` or `List[PIL.Image.Image]`):
|
163
|
+
The PIL image or list of images to convert to NumPy format.
|
164
|
+
|
165
|
+
Returns:
|
166
|
+
`np.ndarray`:
|
167
|
+
A NumPy array representation of the images.
|
124
168
|
"""
|
125
169
|
if not isinstance(images, list):
|
126
170
|
images = [images]
|
@@ -131,8 +175,16 @@ class VaeImageProcessor(ConfigMixin):
|
|
131
175
|
|
132
176
|
@staticmethod
|
133
177
|
def numpy_to_pt(images: np.ndarray) -> torch.Tensor:
|
134
|
-
"""
|
178
|
+
r"""
|
135
179
|
Convert a NumPy image to a PyTorch tensor.
|
180
|
+
|
181
|
+
Args:
|
182
|
+
images (`np.ndarray`):
|
183
|
+
The NumPy image array to convert to PyTorch format.
|
184
|
+
|
185
|
+
Returns:
|
186
|
+
`torch.Tensor`:
|
187
|
+
A PyTorch tensor representation of the images.
|
136
188
|
"""
|
137
189
|
if images.ndim == 3:
|
138
190
|
images = images[..., None]
|
@@ -142,30 +194,62 @@ class VaeImageProcessor(ConfigMixin):
|
|
142
194
|
|
143
195
|
@staticmethod
|
144
196
|
def pt_to_numpy(images: torch.Tensor) -> np.ndarray:
|
145
|
-
"""
|
197
|
+
r"""
|
146
198
|
Convert a PyTorch tensor to a NumPy image.
|
199
|
+
|
200
|
+
Args:
|
201
|
+
images (`torch.Tensor`):
|
202
|
+
The PyTorch tensor to convert to NumPy format.
|
203
|
+
|
204
|
+
Returns:
|
205
|
+
`np.ndarray`:
|
206
|
+
A NumPy array representation of the images.
|
147
207
|
"""
|
148
208
|
images = images.cpu().permute(0, 2, 3, 1).float().numpy()
|
149
209
|
return images
|
150
210
|
|
151
211
|
@staticmethod
|
152
212
|
def normalize(images: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]:
|
153
|
-
"""
|
213
|
+
r"""
|
154
214
|
Normalize an image array to [-1,1].
|
215
|
+
|
216
|
+
Args:
|
217
|
+
images (`np.ndarray` or `torch.Tensor`):
|
218
|
+
The image array to normalize.
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
`np.ndarray` or `torch.Tensor`:
|
222
|
+
The normalized image array.
|
155
223
|
"""
|
156
224
|
return 2.0 * images - 1.0
|
157
225
|
|
158
226
|
@staticmethod
|
159
227
|
def denormalize(images: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]:
|
160
|
-
"""
|
228
|
+
r"""
|
161
229
|
Denormalize an image array to [0,1].
|
230
|
+
|
231
|
+
Args:
|
232
|
+
images (`np.ndarray` or `torch.Tensor`):
|
233
|
+
The image array to denormalize.
|
234
|
+
|
235
|
+
Returns:
|
236
|
+
`np.ndarray` or `torch.Tensor`:
|
237
|
+
The denormalized image array.
|
162
238
|
"""
|
163
|
-
return (images
|
239
|
+
return (images * 0.5 + 0.5).clamp(0, 1)
|
164
240
|
|
165
241
|
@staticmethod
|
166
242
|
def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
|
167
|
-
"""
|
243
|
+
r"""
|
168
244
|
Converts a PIL image to RGB format.
|
245
|
+
|
246
|
+
Args:
|
247
|
+
image (`PIL.Image.Image`):
|
248
|
+
The PIL image to convert to RGB.
|
249
|
+
|
250
|
+
Returns:
|
251
|
+
`PIL.Image.Image`:
|
252
|
+
The RGB-converted PIL image.
|
169
253
|
"""
|
170
254
|
image = image.convert("RGB")
|
171
255
|
|
@@ -173,8 +257,16 @@ class VaeImageProcessor(ConfigMixin):
|
|
173
257
|
|
174
258
|
@staticmethod
|
175
259
|
def convert_to_grayscale(image: PIL.Image.Image) -> PIL.Image.Image:
|
176
|
-
"""
|
177
|
-
Converts a PIL image to grayscale
|
260
|
+
r"""
|
261
|
+
Converts a given PIL image to grayscale.
|
262
|
+
|
263
|
+
Args:
|
264
|
+
image (`PIL.Image.Image`):
|
265
|
+
The input image to convert.
|
266
|
+
|
267
|
+
Returns:
|
268
|
+
`PIL.Image.Image`:
|
269
|
+
The image converted to grayscale.
|
178
270
|
"""
|
179
271
|
image = image.convert("L")
|
180
272
|
|
@@ -182,8 +274,16 @@ class VaeImageProcessor(ConfigMixin):
|
|
182
274
|
|
183
275
|
@staticmethod
|
184
276
|
def blur(image: PIL.Image.Image, blur_factor: int = 4) -> PIL.Image.Image:
|
185
|
-
"""
|
277
|
+
r"""
|
186
278
|
Applies Gaussian blur to an image.
|
279
|
+
|
280
|
+
Args:
|
281
|
+
image (`PIL.Image.Image`):
|
282
|
+
The PIL image to convert to grayscale.
|
283
|
+
|
284
|
+
Returns:
|
285
|
+
`PIL.Image.Image`:
|
286
|
+
The grayscale-converted PIL image.
|
187
287
|
"""
|
188
288
|
image = image.filter(ImageFilter.GaussianBlur(blur_factor))
|
189
289
|
|
@@ -191,7 +291,7 @@ class VaeImageProcessor(ConfigMixin):
|
|
191
291
|
|
192
292
|
@staticmethod
|
193
293
|
def get_crop_region(mask_image: PIL.Image.Image, width: int, height: int, pad=0):
|
194
|
-
"""
|
294
|
+
r"""
|
195
295
|
Finds a rectangular region that contains all masked ares in an image, and expands region to match the aspect
|
196
296
|
ratio of the original image; for example, if user drew mask in a 128x32 region, and the dimensions for
|
197
297
|
processing are 512x512, the region will be expanded to 128x128.
|
@@ -285,14 +385,21 @@ class VaeImageProcessor(ConfigMixin):
|
|
285
385
|
width: int,
|
286
386
|
height: int,
|
287
387
|
) -> PIL.Image.Image:
|
288
|
-
"""
|
388
|
+
r"""
|
289
389
|
Resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center
|
290
390
|
the image within the dimensions, filling empty with data from image.
|
291
391
|
|
292
392
|
Args:
|
293
|
-
image
|
294
|
-
|
295
|
-
|
393
|
+
image (`PIL.Image.Image`):
|
394
|
+
The image to resize and fill.
|
395
|
+
width (`int`):
|
396
|
+
The width to resize the image to.
|
397
|
+
height (`int`):
|
398
|
+
The height to resize the image to.
|
399
|
+
|
400
|
+
Returns:
|
401
|
+
`PIL.Image.Image`:
|
402
|
+
The resized and filled image.
|
296
403
|
"""
|
297
404
|
|
298
405
|
ratio = width / height
|
@@ -330,14 +437,21 @@ class VaeImageProcessor(ConfigMixin):
|
|
330
437
|
width: int,
|
331
438
|
height: int,
|
332
439
|
) -> PIL.Image.Image:
|
333
|
-
"""
|
440
|
+
r"""
|
334
441
|
Resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center
|
335
442
|
the image within the dimensions, cropping the excess.
|
336
443
|
|
337
444
|
Args:
|
338
|
-
image
|
339
|
-
|
340
|
-
|
445
|
+
image (`PIL.Image.Image`):
|
446
|
+
The image to resize and crop.
|
447
|
+
width (`int`):
|
448
|
+
The width to resize the image to.
|
449
|
+
height (`int`):
|
450
|
+
The height to resize the image to.
|
451
|
+
|
452
|
+
Returns:
|
453
|
+
`PIL.Image.Image`:
|
454
|
+
The resized and cropped image.
|
341
455
|
"""
|
342
456
|
ratio = width / height
|
343
457
|
src_ratio = image.width / image.height
|
@@ -423,25 +537,49 @@ class VaeImageProcessor(ConfigMixin):
|
|
423
537
|
|
424
538
|
return image
|
425
539
|
|
540
|
+
def _denormalize_conditionally(
|
541
|
+
self, images: torch.Tensor, do_denormalize: Optional[List[bool]] = None
|
542
|
+
) -> torch.Tensor:
|
543
|
+
r"""
|
544
|
+
Denormalize a batch of images based on a condition list.
|
545
|
+
|
546
|
+
Args:
|
547
|
+
images (`torch.Tensor`):
|
548
|
+
The input image tensor.
|
549
|
+
do_denormalize (`Optional[List[bool]`, *optional*, defaults to `None`):
|
550
|
+
A list of booleans indicating whether to denormalize each image in the batch. If `None`, will use the
|
551
|
+
value of `do_normalize` in the `VaeImageProcessor` config.
|
552
|
+
"""
|
553
|
+
if do_denormalize is None:
|
554
|
+
return self.denormalize(images) if self.config.do_normalize else images
|
555
|
+
|
556
|
+
return torch.stack(
|
557
|
+
[self.denormalize(images[i]) if do_denormalize[i] else images[i] for i in range(images.shape[0])]
|
558
|
+
)
|
559
|
+
|
426
560
|
def get_default_height_width(
|
427
561
|
self,
|
428
562
|
image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
|
429
563
|
height: Optional[int] = None,
|
430
564
|
width: Optional[int] = None,
|
431
565
|
) -> Tuple[int, int]:
|
432
|
-
"""
|
433
|
-
|
434
|
-
`vae_scale_factor`.
|
566
|
+
r"""
|
567
|
+
Returns the height and width of the image, downscaled to the next integer multiple of `vae_scale_factor`.
|
435
568
|
|
436
569
|
Args:
|
437
|
-
image(`PIL.Image.Image
|
438
|
-
The image input, can be a PIL image,
|
439
|
-
shape `[batch, height, width]` or `[batch, height, width,
|
440
|
-
have shape `[batch,
|
441
|
-
height (`int`, *optional*, defaults to `None`):
|
442
|
-
The height
|
443
|
-
width (`int`, *optional
|
444
|
-
The width
|
570
|
+
image (`Union[PIL.Image.Image, np.ndarray, torch.Tensor]`):
|
571
|
+
The image input, which can be a PIL image, NumPy array, or PyTorch tensor. If it is a NumPy array, it
|
572
|
+
should have shape `[batch, height, width]` or `[batch, height, width, channels]`. If it is a PyTorch
|
573
|
+
tensor, it should have shape `[batch, channels, height, width]`.
|
574
|
+
height (`Optional[int]`, *optional*, defaults to `None`):
|
575
|
+
The height of the preprocessed image. If `None`, the height of the `image` input will be used.
|
576
|
+
width (`Optional[int]`, *optional*, defaults to `None`):
|
577
|
+
The width of the preprocessed image. If `None`, the width of the `image` input will be used.
|
578
|
+
|
579
|
+
Returns:
|
580
|
+
`Tuple[int, int]`:
|
581
|
+
A tuple containing the height and width, both resized to the nearest integer multiple of
|
582
|
+
`vae_scale_factor`.
|
445
583
|
"""
|
446
584
|
|
447
585
|
if height is None:
|
@@ -478,13 +616,13 @@ class VaeImageProcessor(ConfigMixin):
|
|
478
616
|
Preprocess the image input.
|
479
617
|
|
480
618
|
Args:
|
481
|
-
image (`
|
619
|
+
image (`PipelineImageInput`):
|
482
620
|
The image input, accepted formats are PIL images, NumPy arrays, PyTorch tensors; Also accept list of
|
483
621
|
supported formats.
|
484
|
-
height (`int`, *optional
|
622
|
+
height (`int`, *optional*):
|
485
623
|
The height in preprocessed image. If `None`, will use the `get_default_height_width()` to get default
|
486
624
|
height.
|
487
|
-
width (`int`, *optional
|
625
|
+
width (`int`, *optional*):
|
488
626
|
The width in preprocessed. If `None`, will use get_default_height_width()` to get the default width.
|
489
627
|
resize_mode (`str`, *optional*, defaults to `default`):
|
490
628
|
The resize mode, can be one of `default` or `fill`. If `default`, will resize the image to fit within
|
@@ -496,6 +634,10 @@ class VaeImageProcessor(ConfigMixin):
|
|
496
634
|
supported for PIL image input.
|
497
635
|
crops_coords (`List[Tuple[int, int, int, int]]`, *optional*, defaults to `None`):
|
498
636
|
The crop coordinates for each image in the batch. If `None`, will not crop the image.
|
637
|
+
|
638
|
+
Returns:
|
639
|
+
`torch.Tensor`:
|
640
|
+
The preprocessed image.
|
499
641
|
"""
|
500
642
|
supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor)
|
501
643
|
|
@@ -569,7 +711,7 @@ class VaeImageProcessor(ConfigMixin):
|
|
569
711
|
|
570
712
|
channel = image.shape[1]
|
571
713
|
# don't need any preprocess if the image is latents
|
572
|
-
if channel == self.vae_latent_channels:
|
714
|
+
if channel == self.config.vae_latent_channels:
|
573
715
|
return image
|
574
716
|
|
575
717
|
height, width = self.get_default_height_width(image, height, width)
|
@@ -630,12 +772,7 @@ class VaeImageProcessor(ConfigMixin):
|
|
630
772
|
if output_type == "latent":
|
631
773
|
return image
|
632
774
|
|
633
|
-
|
634
|
-
do_denormalize = [self.config.do_normalize] * image.shape[0]
|
635
|
-
|
636
|
-
image = torch.stack(
|
637
|
-
[self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
|
638
|
-
)
|
775
|
+
image = self._denormalize_conditionally(image, do_denormalize)
|
639
776
|
|
640
777
|
if output_type == "pt":
|
641
778
|
return image
|
@@ -655,17 +792,29 @@ class VaeImageProcessor(ConfigMixin):
|
|
655
792
|
image: PIL.Image.Image,
|
656
793
|
crop_coords: Optional[Tuple[int, int, int, int]] = None,
|
657
794
|
) -> PIL.Image.Image:
|
658
|
-
"""
|
659
|
-
overlay the
|
660
|
-
"""
|
795
|
+
r"""
|
796
|
+
Applies an overlay of the mask and the inpainted image on the original image.
|
661
797
|
|
662
|
-
|
798
|
+
Args:
|
799
|
+
mask (`PIL.Image.Image`):
|
800
|
+
The mask image that highlights regions to overlay.
|
801
|
+
init_image (`PIL.Image.Image`):
|
802
|
+
The original image to which the overlay is applied.
|
803
|
+
image (`PIL.Image.Image`):
|
804
|
+
The image to overlay onto the original.
|
805
|
+
crop_coords (`Tuple[int, int, int, int]`, *optional*):
|
806
|
+
Coordinates to crop the image. If provided, the image will be cropped accordingly.
|
807
|
+
|
808
|
+
Returns:
|
809
|
+
`PIL.Image.Image`:
|
810
|
+
The final image with the overlay applied.
|
811
|
+
"""
|
663
812
|
|
664
|
-
|
665
|
-
mask = self.resize(mask, width=width, height=height)
|
813
|
+
width, height = init_image.width, init_image.height
|
666
814
|
|
667
815
|
init_image_masked = PIL.Image.new("RGBa", (width, height))
|
668
816
|
init_image_masked.paste(init_image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(mask.convert("L")))
|
817
|
+
|
669
818
|
init_image_masked = init_image_masked.convert("RGBA")
|
670
819
|
|
671
820
|
if crop_coords is not None:
|
@@ -713,8 +862,16 @@ class VaeImageProcessorLDM3D(VaeImageProcessor):
|
|
713
862
|
|
714
863
|
@staticmethod
|
715
864
|
def numpy_to_pil(images: np.ndarray) -> List[PIL.Image.Image]:
|
716
|
-
"""
|
717
|
-
Convert a NumPy image or a batch of images to a PIL
|
865
|
+
r"""
|
866
|
+
Convert a NumPy image or a batch of images to a list of PIL images.
|
867
|
+
|
868
|
+
Args:
|
869
|
+
images (`np.ndarray`):
|
870
|
+
The input NumPy array of images, which can be a single image or a batch.
|
871
|
+
|
872
|
+
Returns:
|
873
|
+
`List[PIL.Image.Image]`:
|
874
|
+
A list of PIL images converted from the input NumPy array.
|
718
875
|
"""
|
719
876
|
if images.ndim == 3:
|
720
877
|
images = images[None, ...]
|
@@ -729,8 +886,16 @@ class VaeImageProcessorLDM3D(VaeImageProcessor):
|
|
729
886
|
|
730
887
|
@staticmethod
|
731
888
|
def depth_pil_to_numpy(images: Union[List[PIL.Image.Image], PIL.Image.Image]) -> np.ndarray:
|
732
|
-
"""
|
889
|
+
r"""
|
733
890
|
Convert a PIL image or a list of PIL images to NumPy arrays.
|
891
|
+
|
892
|
+
Args:
|
893
|
+
images (`Union[List[PIL.Image.Image], PIL.Image.Image]`):
|
894
|
+
The input image or list of images to be converted.
|
895
|
+
|
896
|
+
Returns:
|
897
|
+
`np.ndarray`:
|
898
|
+
A NumPy array of the converted images.
|
734
899
|
"""
|
735
900
|
if not isinstance(images, list):
|
736
901
|
images = [images]
|
@@ -741,18 +906,30 @@ class VaeImageProcessorLDM3D(VaeImageProcessor):
|
|
741
906
|
|
742
907
|
@staticmethod
|
743
908
|
def rgblike_to_depthmap(image: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]:
|
744
|
-
"""
|
745
|
-
|
746
|
-
image: RGB-like depth image
|
909
|
+
r"""
|
910
|
+
Convert an RGB-like depth image to a depth map.
|
747
911
|
|
748
|
-
|
912
|
+
Args:
|
913
|
+
image (`Union[np.ndarray, torch.Tensor]`):
|
914
|
+
The RGB-like depth image to convert.
|
749
915
|
|
916
|
+
Returns:
|
917
|
+
`Union[np.ndarray, torch.Tensor]`:
|
918
|
+
The corresponding depth map.
|
750
919
|
"""
|
751
920
|
return image[:, :, 1] * 2**8 + image[:, :, 2]
|
752
921
|
|
753
922
|
def numpy_to_depth(self, images: np.ndarray) -> List[PIL.Image.Image]:
|
754
|
-
"""
|
755
|
-
Convert a NumPy depth image or a batch of images to a PIL
|
923
|
+
r"""
|
924
|
+
Convert a NumPy depth image or a batch of images to a list of PIL images.
|
925
|
+
|
926
|
+
Args:
|
927
|
+
images (`np.ndarray`):
|
928
|
+
The input NumPy array of depth images, which can be a single image or a batch.
|
929
|
+
|
930
|
+
Returns:
|
931
|
+
`List[PIL.Image.Image]`:
|
932
|
+
A list of PIL images converted from the input NumPy depth images.
|
756
933
|
"""
|
757
934
|
if images.ndim == 3:
|
758
935
|
images = images[None, ...]
|
@@ -804,12 +981,7 @@ class VaeImageProcessorLDM3D(VaeImageProcessor):
|
|
804
981
|
deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False)
|
805
982
|
output_type = "np"
|
806
983
|
|
807
|
-
|
808
|
-
do_denormalize = [self.config.do_normalize] * image.shape[0]
|
809
|
-
|
810
|
-
image = torch.stack(
|
811
|
-
[self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
|
812
|
-
)
|
984
|
+
image = self._denormalize_conditionally(image, do_denormalize)
|
813
985
|
|
814
986
|
image = self.pt_to_numpy(image)
|
815
987
|
|
@@ -833,8 +1005,24 @@ class VaeImageProcessorLDM3D(VaeImageProcessor):
|
|
833
1005
|
width: Optional[int] = None,
|
834
1006
|
target_res: Optional[int] = None,
|
835
1007
|
) -> torch.Tensor:
|
836
|
-
"""
|
837
|
-
Preprocess the image input. Accepted formats are PIL images, NumPy arrays or PyTorch tensors.
|
1008
|
+
r"""
|
1009
|
+
Preprocess the image input. Accepted formats are PIL images, NumPy arrays, or PyTorch tensors.
|
1010
|
+
|
1011
|
+
Args:
|
1012
|
+
rgb (`Union[torch.Tensor, PIL.Image.Image, np.ndarray]`):
|
1013
|
+
The RGB input image, which can be a single image or a batch.
|
1014
|
+
depth (`Union[torch.Tensor, PIL.Image.Image, np.ndarray]`):
|
1015
|
+
The depth input image, which can be a single image or a batch.
|
1016
|
+
height (`Optional[int]`, *optional*, defaults to `None`):
|
1017
|
+
The desired height of the processed image. If `None`, defaults to the height of the input image.
|
1018
|
+
width (`Optional[int]`, *optional*, defaults to `None`):
|
1019
|
+
The desired width of the processed image. If `None`, defaults to the width of the input image.
|
1020
|
+
target_res (`Optional[int]`, *optional*, defaults to `None`):
|
1021
|
+
Target resolution for resizing the images. If specified, overrides height and width.
|
1022
|
+
|
1023
|
+
Returns:
|
1024
|
+
`Tuple[torch.Tensor, torch.Tensor]`:
|
1025
|
+
A tuple containing the processed RGB and depth images as PyTorch tensors.
|
838
1026
|
"""
|
839
1027
|
supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor)
|
840
1028
|
|
@@ -1072,7 +1260,17 @@ class PixArtImageProcessor(VaeImageProcessor):
|
|
1072
1260
|
|
1073
1261
|
@staticmethod
|
1074
1262
|
def classify_height_width_bin(height: int, width: int, ratios: dict) -> Tuple[int, int]:
|
1075
|
-
"""
|
1263
|
+
r"""
|
1264
|
+
Returns the binned height and width based on the aspect ratio.
|
1265
|
+
|
1266
|
+
Args:
|
1267
|
+
height (`int`): The height of the image.
|
1268
|
+
width (`int`): The width of the image.
|
1269
|
+
ratios (`dict`): A dictionary where keys are aspect ratios and values are tuples of (height, width).
|
1270
|
+
|
1271
|
+
Returns:
|
1272
|
+
`Tuple[int, int]`: The closest binned height and width.
|
1273
|
+
"""
|
1076
1274
|
ar = float(height / width)
|
1077
1275
|
closest_ratio = min(ratios.keys(), key=lambda ratio: abs(float(ratio) - ar))
|
1078
1276
|
default_hw = ratios[closest_ratio]
|
@@ -1080,6 +1278,19 @@ class PixArtImageProcessor(VaeImageProcessor):
|
|
1080
1278
|
|
1081
1279
|
@staticmethod
|
1082
1280
|
def resize_and_crop_tensor(samples: torch.Tensor, new_width: int, new_height: int) -> torch.Tensor:
|
1281
|
+
r"""
|
1282
|
+
Resizes and crops a tensor of images to the specified dimensions.
|
1283
|
+
|
1284
|
+
Args:
|
1285
|
+
samples (`torch.Tensor`):
|
1286
|
+
A tensor of shape (N, C, H, W) where N is the batch size, C is the number of channels, H is the height,
|
1287
|
+
and W is the width.
|
1288
|
+
new_width (`int`): The desired width of the output images.
|
1289
|
+
new_height (`int`): The desired height of the output images.
|
1290
|
+
|
1291
|
+
Returns:
|
1292
|
+
`torch.Tensor`: A tensor containing the resized and cropped images.
|
1293
|
+
"""
|
1083
1294
|
orig_height, orig_width = samples.shape[2], samples.shape[3]
|
1084
1295
|
|
1085
1296
|
# Check if resizing is needed
|
diffusers/loaders/__init__.py
CHANGED
@@ -55,7 +55,8 @@ _import_structure = {}
|
|
55
55
|
|
56
56
|
if is_torch_available():
|
57
57
|
_import_structure["single_file_model"] = ["FromOriginalModelMixin"]
|
58
|
-
|
58
|
+
_import_structure["transformer_flux"] = ["FluxTransformer2DLoadersMixin"]
|
59
|
+
_import_structure["transformer_sd3"] = ["SD3Transformer2DLoadersMixin"]
|
59
60
|
_import_structure["unet"] = ["UNet2DConditionLoadersMixin"]
|
60
61
|
_import_structure["utils"] = ["AttnProcsLayers"]
|
61
62
|
if is_transformers_available():
|
@@ -65,11 +66,20 @@ if is_torch_available():
|
|
65
66
|
"StableDiffusionLoraLoaderMixin",
|
66
67
|
"SD3LoraLoaderMixin",
|
67
68
|
"StableDiffusionXLLoraLoaderMixin",
|
69
|
+
"LTXVideoLoraLoaderMixin",
|
68
70
|
"LoraLoaderMixin",
|
69
71
|
"FluxLoraLoaderMixin",
|
72
|
+
"CogVideoXLoraLoaderMixin",
|
73
|
+
"Mochi1LoraLoaderMixin",
|
74
|
+
"HunyuanVideoLoraLoaderMixin",
|
75
|
+
"SanaLoraLoaderMixin",
|
70
76
|
]
|
71
77
|
_import_structure["textual_inversion"] = ["TextualInversionLoaderMixin"]
|
72
|
-
_import_structure["ip_adapter"] = [
|
78
|
+
_import_structure["ip_adapter"] = [
|
79
|
+
"IPAdapterMixin",
|
80
|
+
"FluxIPAdapterMixin",
|
81
|
+
"SD3IPAdapterMixin",
|
82
|
+
]
|
73
83
|
|
74
84
|
_import_structure["peft"] = ["PeftAdapterMixin"]
|
75
85
|
|
@@ -77,15 +87,26 @@ _import_structure["peft"] = ["PeftAdapterMixin"]
|
|
77
87
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
78
88
|
if is_torch_available():
|
79
89
|
from .single_file_model import FromOriginalModelMixin
|
90
|
+
from .transformer_flux import FluxTransformer2DLoadersMixin
|
91
|
+
from .transformer_sd3 import SD3Transformer2DLoadersMixin
|
80
92
|
from .unet import UNet2DConditionLoadersMixin
|
81
93
|
from .utils import AttnProcsLayers
|
82
94
|
|
83
95
|
if is_transformers_available():
|
84
|
-
from .ip_adapter import
|
96
|
+
from .ip_adapter import (
|
97
|
+
FluxIPAdapterMixin,
|
98
|
+
IPAdapterMixin,
|
99
|
+
SD3IPAdapterMixin,
|
100
|
+
)
|
85
101
|
from .lora_pipeline import (
|
86
102
|
AmusedLoraLoaderMixin,
|
103
|
+
CogVideoXLoraLoaderMixin,
|
87
104
|
FluxLoraLoaderMixin,
|
105
|
+
HunyuanVideoLoraLoaderMixin,
|
88
106
|
LoraLoaderMixin,
|
107
|
+
LTXVideoLoraLoaderMixin,
|
108
|
+
Mochi1LoraLoaderMixin,
|
109
|
+
SanaLoraLoaderMixin,
|
89
110
|
SD3LoraLoaderMixin,
|
90
111
|
StableDiffusionLoraLoaderMixin,
|
91
112
|
StableDiffusionXLLoraLoaderMixin,
|