diffusers 0.30.2__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +38 -2
- diffusers/configuration_utils.py +12 -0
- diffusers/dependency_versions_table.py +1 -1
- diffusers/image_processor.py +257 -54
- diffusers/loaders/__init__.py +2 -0
- diffusers/loaders/ip_adapter.py +5 -1
- diffusers/loaders/lora_base.py +14 -7
- diffusers/loaders/lora_conversion_utils.py +332 -0
- diffusers/loaders/lora_pipeline.py +707 -41
- diffusers/loaders/peft.py +1 -0
- diffusers/loaders/single_file_utils.py +81 -4
- diffusers/loaders/textual_inversion.py +2 -0
- diffusers/loaders/unet.py +39 -8
- diffusers/models/__init__.py +4 -0
- diffusers/models/adapter.py +53 -53
- diffusers/models/attention.py +86 -10
- diffusers/models/attention_processor.py +169 -133
- diffusers/models/autoencoders/autoencoder_kl.py +71 -11
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +287 -85
- diffusers/models/controlnet_flux.py +536 -0
- diffusers/models/controlnet_sd3.py +7 -3
- diffusers/models/controlnet_sparsectrl.py +0 -1
- diffusers/models/embeddings.py +238 -61
- diffusers/models/embeddings_flax.py +23 -9
- diffusers/models/model_loading_utils.py +182 -14
- diffusers/models/modeling_utils.py +283 -46
- diffusers/models/normalization.py +79 -0
- diffusers/models/transformers/__init__.py +1 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +1 -0
- diffusers/models/transformers/cogvideox_transformer_3d.py +58 -36
- diffusers/models/transformers/pixart_transformer_2d.py +9 -1
- diffusers/models/transformers/transformer_cogview3plus.py +386 -0
- diffusers/models/transformers/transformer_flux.py +161 -44
- diffusers/models/transformers/transformer_sd3.py +7 -1
- diffusers/models/unets/unet_2d_condition.py +8 -8
- diffusers/models/unets/unet_motion_model.py +41 -63
- diffusers/models/upsampling.py +6 -6
- diffusers/pipelines/__init__.py +40 -7
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +44 -20
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -66
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +1 -1
- diffusers/pipelines/auto_pipeline.py +39 -8
- diffusers/pipelines/cogvideo/__init__.py +6 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +32 -34
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +794 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +837 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +825 -0
- diffusers/pipelines/cogvideo/pipeline_output.py +20 -0
- diffusers/pipelines/cogview3/__init__.py +47 -0
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
- diffusers/pipelines/cogview3/pipeline_output.py +21 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +9 -1
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +8 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +8 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +36 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +9 -1
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +8 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +17 -3
- diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +3 -1
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
- diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
- diffusers/pipelines/flux/__init__.py +10 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -20
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +984 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +988 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1182 -0
- diffusers/pipelines/flux/pipeline_flux_img2img.py +850 -0
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1015 -0
- diffusers/pipelines/free_noise_utils.py +365 -5
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +15 -3
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
- diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
- diffusers/pipelines/kolors/tokenizer.py +4 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
- diffusers/pipelines/latte/pipeline_latte.py +2 -2
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
- diffusers/pipelines/lumina/pipeline_lumina.py +2 -2
- diffusers/pipelines/pag/__init__.py +6 -0
- diffusers/pipelines/pag/pag_utils.py +8 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1544 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1685 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +17 -5
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +12 -3
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1091 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
- diffusers/pipelines/pia/pipeline_pia.py +2 -0
- diffusers/pipelines/pipeline_loading_utils.py +225 -27
- diffusers/pipelines/pipeline_utils.py +123 -180
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +1 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +28 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +12 -3
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +20 -4
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +3 -3
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -14
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -14
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
- diffusers/quantizers/__init__.py +16 -0
- diffusers/quantizers/auto.py +126 -0
- diffusers/quantizers/base.py +233 -0
- diffusers/quantizers/bitsandbytes/__init__.py +2 -0
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +558 -0
- diffusers/quantizers/bitsandbytes/utils.py +306 -0
- diffusers/quantizers/quantization_config.py +391 -0
- diffusers/schedulers/scheduling_ddim.py +4 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
- diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
- diffusers/schedulers/scheduling_ddpm.py +4 -1
- diffusers/schedulers/scheduling_ddpm_parallel.py +4 -1
- diffusers/schedulers/scheduling_deis_multistep.py +78 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +82 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +80 -1
- diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +82 -1
- diffusers/schedulers/scheduling_edm_euler.py +8 -6
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
- diffusers/schedulers/scheduling_euler_discrete.py +92 -7
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
- diffusers/schedulers/scheduling_heun_discrete.py +114 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
- diffusers/schedulers/scheduling_lms_discrete.py +76 -1
- diffusers/schedulers/scheduling_sasolver.py +78 -1
- diffusers/schedulers/scheduling_unclip.py +4 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +78 -1
- diffusers/training_utils.py +48 -18
- diffusers/utils/__init__.py +2 -1
- diffusers/utils/dummy_pt_objects.py +60 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +195 -0
- diffusers/utils/hub_utils.py +16 -4
- diffusers/utils/import_utils.py +31 -8
- diffusers/utils/loading_utils.py +28 -4
- diffusers/utils/peft_utils.py +3 -3
- diffusers/utils/testing_utils.py +59 -0
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/METADATA +7 -6
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/RECORD +173 -147
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/WHEEL +1 -1
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/LICENSE +0 -0
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ import torch.nn as nn
|
|
18
18
|
|
19
19
|
from ...configuration_utils import ConfigMixin, register_to_config
|
20
20
|
from ...loaders.single_file_model import FromOriginalModelMixin
|
21
|
+
from ...utils import deprecate
|
21
22
|
from ...utils.accelerate_utils import apply_forward_hook
|
22
23
|
from ..attention_processor import (
|
23
24
|
ADDED_KV_ATTENTION_PROCESSORS,
|
@@ -245,6 +246,18 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
|
|
245
246
|
|
246
247
|
self.set_attn_processor(processor)
|
247
248
|
|
249
|
+
def _encode(self, x: torch.Tensor) -> torch.Tensor:
|
250
|
+
batch_size, num_channels, height, width = x.shape
|
251
|
+
|
252
|
+
if self.use_tiling and (width > self.tile_sample_min_size or height > self.tile_sample_min_size):
|
253
|
+
return self._tiled_encode(x)
|
254
|
+
|
255
|
+
enc = self.encoder(x)
|
256
|
+
if self.quant_conv is not None:
|
257
|
+
enc = self.quant_conv(enc)
|
258
|
+
|
259
|
+
return enc
|
260
|
+
|
248
261
|
@apply_forward_hook
|
249
262
|
def encode(
|
250
263
|
self, x: torch.Tensor, return_dict: bool = True
|
@@ -261,21 +274,13 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
|
|
261
274
|
The latent representations of the encoded images. If `return_dict` is True, a
|
262
275
|
[`~models.autoencoder_kl.AutoencoderKLOutput`] is returned, otherwise a plain `tuple` is returned.
|
263
276
|
"""
|
264
|
-
if self.use_tiling and (x.shape[-1] > self.tile_sample_min_size or x.shape[-2] > self.tile_sample_min_size):
|
265
|
-
return self.tiled_encode(x, return_dict=return_dict)
|
266
|
-
|
267
277
|
if self.use_slicing and x.shape[0] > 1:
|
268
|
-
encoded_slices = [self.
|
278
|
+
encoded_slices = [self._encode(x_slice) for x_slice in x.split(1)]
|
269
279
|
h = torch.cat(encoded_slices)
|
270
280
|
else:
|
271
|
-
h = self.
|
272
|
-
|
273
|
-
if self.quant_conv is not None:
|
274
|
-
moments = self.quant_conv(h)
|
275
|
-
else:
|
276
|
-
moments = h
|
281
|
+
h = self._encode(x)
|
277
282
|
|
278
|
-
posterior = DiagonalGaussianDistribution(
|
283
|
+
posterior = DiagonalGaussianDistribution(h)
|
279
284
|
|
280
285
|
if not return_dict:
|
281
286
|
return (posterior,)
|
@@ -337,6 +342,54 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
|
|
337
342
|
b[:, :, :, x] = a[:, :, :, -blend_extent + x] * (1 - x / blend_extent) + b[:, :, :, x] * (x / blend_extent)
|
338
343
|
return b
|
339
344
|
|
345
|
+
def _tiled_encode(self, x: torch.Tensor) -> torch.Tensor:
|
346
|
+
r"""Encode a batch of images using a tiled encoder.
|
347
|
+
|
348
|
+
When this option is enabled, the VAE will split the input tensor into tiles to compute encoding in several
|
349
|
+
steps. This is useful to keep memory use constant regardless of image size. The end result of tiled encoding is
|
350
|
+
different from non-tiled encoding because each tile uses a different encoder. To avoid tiling artifacts, the
|
351
|
+
tiles overlap and are blended together to form a smooth output. You may still see tile-sized changes in the
|
352
|
+
output, but they should be much less noticeable.
|
353
|
+
|
354
|
+
Args:
|
355
|
+
x (`torch.Tensor`): Input batch of images.
|
356
|
+
|
357
|
+
Returns:
|
358
|
+
`torch.Tensor`:
|
359
|
+
The latent representation of the encoded videos.
|
360
|
+
"""
|
361
|
+
|
362
|
+
overlap_size = int(self.tile_sample_min_size * (1 - self.tile_overlap_factor))
|
363
|
+
blend_extent = int(self.tile_latent_min_size * self.tile_overlap_factor)
|
364
|
+
row_limit = self.tile_latent_min_size - blend_extent
|
365
|
+
|
366
|
+
# Split the image into 512x512 tiles and encode them separately.
|
367
|
+
rows = []
|
368
|
+
for i in range(0, x.shape[2], overlap_size):
|
369
|
+
row = []
|
370
|
+
for j in range(0, x.shape[3], overlap_size):
|
371
|
+
tile = x[:, :, i : i + self.tile_sample_min_size, j : j + self.tile_sample_min_size]
|
372
|
+
tile = self.encoder(tile)
|
373
|
+
if self.config.use_quant_conv:
|
374
|
+
tile = self.quant_conv(tile)
|
375
|
+
row.append(tile)
|
376
|
+
rows.append(row)
|
377
|
+
result_rows = []
|
378
|
+
for i, row in enumerate(rows):
|
379
|
+
result_row = []
|
380
|
+
for j, tile in enumerate(row):
|
381
|
+
# blend the above tile and the left tile
|
382
|
+
# to the current tile and add the current tile to the result row
|
383
|
+
if i > 0:
|
384
|
+
tile = self.blend_v(rows[i - 1][j], tile, blend_extent)
|
385
|
+
if j > 0:
|
386
|
+
tile = self.blend_h(row[j - 1], tile, blend_extent)
|
387
|
+
result_row.append(tile[:, :, :row_limit, :row_limit])
|
388
|
+
result_rows.append(torch.cat(result_row, dim=3))
|
389
|
+
|
390
|
+
enc = torch.cat(result_rows, dim=2)
|
391
|
+
return enc
|
392
|
+
|
340
393
|
def tiled_encode(self, x: torch.Tensor, return_dict: bool = True) -> AutoencoderKLOutput:
|
341
394
|
r"""Encode a batch of images using a tiled encoder.
|
342
395
|
|
@@ -356,6 +409,13 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
|
|
356
409
|
If return_dict is True, a [`~models.autoencoder_kl.AutoencoderKLOutput`] is returned, otherwise a plain
|
357
410
|
`tuple` is returned.
|
358
411
|
"""
|
412
|
+
deprecation_message = (
|
413
|
+
"The tiled_encode implementation supporting the `return_dict` parameter is deprecated. In the future, the "
|
414
|
+
"implementation of this method will be replaced with that of `_tiled_encode` and you will no longer be able "
|
415
|
+
"to pass `return_dict`. You will also have to create a `DiagonalGaussianDistribution()` from the returned value."
|
416
|
+
)
|
417
|
+
deprecate("tiled_encode", "1.0.0", deprecation_message, standard_warn=False)
|
418
|
+
|
359
419
|
overlap_size = int(self.tile_sample_min_size * (1 - self.tile_overlap_factor))
|
360
420
|
blend_extent = int(self.tile_latent_min_size * self.tile_overlap_factor)
|
361
421
|
row_limit = self.tile_latent_min_size - blend_extent
|