diffusers 0.34.0__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +98 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +2 -0
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_table.py +3 -3
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +7 -6
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +292 -286
- diffusers/hooks/hooks.py +56 -1
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +2 -7
- diffusers/hooks/pyramid_attention_broadcast.py +14 -11
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +255 -4
- diffusers/loaders/lora_base.py +63 -30
- diffusers/loaders/lora_conversion_utils.py +434 -53
- diffusers/loaders/lora_pipeline.py +834 -37
- diffusers/loaders/peft.py +28 -5
- diffusers/loaders/single_file_model.py +44 -11
- diffusers/loaders/single_file_utils.py +170 -2
- diffusers/loaders/transformer_flux.py +9 -10
- diffusers/loaders/transformer_sd3.py +6 -1
- diffusers/loaders/unet.py +22 -5
- diffusers/loaders/unet_loader_utils.py +5 -2
- diffusers/models/__init__.py +8 -0
- diffusers/models/attention.py +484 -3
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_processor.py +105 -663
- diffusers/models/auto_model.py +2 -2
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_dc.py +14 -1
- diffusers/models/autoencoders/autoencoder_kl.py +1 -1
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -1
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_wan.py +370 -40
- diffusers/models/cache_utils.py +31 -9
- diffusers/models/controlnets/controlnet_flux.py +5 -5
- diffusers/models/controlnets/controlnet_union.py +4 -4
- diffusers/models/embeddings.py +26 -34
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +159 -94
- diffusers/models/transformers/__init__.py +2 -0
- diffusers/models/transformers/transformer_chroma.py +16 -117
- diffusers/models/transformers/transformer_cogview4.py +36 -2
- diffusers/models/transformers/transformer_cosmos.py +11 -4
- diffusers/models/transformers/transformer_flux.py +372 -132
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -0
- diffusers/models/transformers/transformer_ltx.py +104 -23
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_wan.py +298 -85
- diffusers/models/transformers/transformer_wan_vace.py +15 -21
- diffusers/models/unets/unet_2d_condition.py +2 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +31 -0
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +2 -3
- diffusers/pipelines/auto_pipeline.py +17 -13
- diffusers/pipelines/chroma/pipeline_chroma.py +5 -5
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +5 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +9 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +9 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +10 -9
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +9 -8
- diffusers/pipelines/cogview4/pipeline_cogview4.py +16 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +3 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +212 -93
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +7 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +194 -92
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +3 -1
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/pipeline_flux.py +34 -26
- diffusers/pipelines/flux/pipeline_flux_control.py +8 -8
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_img2img.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +5 -5
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +25 -24
- diffusers/pipelines/ltx/pipeline_ltx.py +13 -12
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +10 -9
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +13 -12
- diffusers/pipelines/mochi/pipeline_mochi.py +9 -8
- diffusers/pipelines/pipeline_flax_utils.py +2 -2
- diffusers/pipelines/pipeline_loading_utils.py +24 -2
- diffusers/pipelines/pipeline_utils.py +22 -15
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +3 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +20 -0
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +5 -5
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +6 -5
- diffusers/pipelines/wan/pipeline_wan.py +78 -20
- diffusers/pipelines/wan/pipeline_wan_i2v.py +112 -32
- diffusers/pipelines/wan/pipeline_wan_vace.py +1 -2
- diffusers/quantizers/__init__.py +1 -177
- diffusers/quantizers/base.py +11 -0
- diffusers/quantizers/gguf/utils.py +92 -3
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +26 -0
- diffusers/schedulers/scheduling_deis_multistep.py +8 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +6 -0
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +6 -0
- diffusers/schedulers/scheduling_scm.py +0 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +10 -1
- diffusers/schedulers/scheduling_utils.py +2 -2
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/training_utils.py +78 -0
- diffusers/utils/__init__.py +10 -0
- diffusers/utils/constants.py +4 -0
- diffusers/utils/dummy_pt_objects.py +312 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +255 -0
- diffusers/utils/dynamic_modules_utils.py +84 -25
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +70 -0
- diffusers/utils/peft_utils.py +11 -8
- diffusers/utils/testing_utils.py +136 -10
- diffusers/utils/torch_utils.py +18 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/METADATA +6 -6
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/RECORD +191 -127
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/WHEEL +0 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
from ...loaders import WanLoraLoaderMixin
|
17
|
+
from ...pipelines.pipeline_utils import StableDiffusionMixin
|
18
|
+
from ...utils import logging
|
19
|
+
from ..modular_pipeline import ModularPipeline
|
20
|
+
|
21
|
+
|
22
|
+
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
23
|
+
|
24
|
+
|
25
|
+
class WanModularPipeline(
|
26
|
+
ModularPipeline,
|
27
|
+
StableDiffusionMixin,
|
28
|
+
WanLoraLoaderMixin,
|
29
|
+
):
|
30
|
+
"""
|
31
|
+
A ModularPipeline for Wan.
|
32
|
+
|
33
|
+
<Tip warning={true}>
|
34
|
+
|
35
|
+
This is an experimental feature and is likely to change in the future.
|
36
|
+
|
37
|
+
</Tip>
|
38
|
+
"""
|
39
|
+
|
40
|
+
@property
|
41
|
+
def default_height(self):
|
42
|
+
return self.default_sample_height * self.vae_scale_factor_spatial
|
43
|
+
|
44
|
+
@property
|
45
|
+
def default_width(self):
|
46
|
+
return self.default_sample_width * self.vae_scale_factor_spatial
|
47
|
+
|
48
|
+
@property
|
49
|
+
def default_num_frames(self):
|
50
|
+
return (self.default_sample_num_frames - 1) * self.vae_scale_factor_temporal + 1
|
51
|
+
|
52
|
+
@property
|
53
|
+
def default_sample_height(self):
|
54
|
+
return 60
|
55
|
+
|
56
|
+
@property
|
57
|
+
def default_sample_width(self):
|
58
|
+
return 104
|
59
|
+
|
60
|
+
@property
|
61
|
+
def default_sample_num_frames(self):
|
62
|
+
return 21
|
63
|
+
|
64
|
+
@property
|
65
|
+
def vae_scale_factor_spatial(self):
|
66
|
+
vae_scale_factor = 8
|
67
|
+
if hasattr(self, "vae") and self.vae is not None:
|
68
|
+
vae_scale_factor = 2 ** len(self.vae.temperal_downsample)
|
69
|
+
return vae_scale_factor
|
70
|
+
|
71
|
+
@property
|
72
|
+
def vae_scale_factor_temporal(self):
|
73
|
+
vae_scale_factor = 4
|
74
|
+
if hasattr(self, "vae") and self.vae is not None:
|
75
|
+
vae_scale_factor = 2 ** sum(self.vae.temperal_downsample)
|
76
|
+
return vae_scale_factor
|
77
|
+
|
78
|
+
@property
|
79
|
+
def num_channels_transformer(self):
|
80
|
+
num_channels_transformer = 16
|
81
|
+
if hasattr(self, "transformer") and self.transformer is not None:
|
82
|
+
num_channels_transformer = self.transformer.config.in_channels
|
83
|
+
return num_channels_transformer
|
84
|
+
|
85
|
+
@property
|
86
|
+
def num_channels_latents(self):
|
87
|
+
num_channels_latents = 16
|
88
|
+
if hasattr(self, "vae") and self.vae is not None:
|
89
|
+
num_channels_latents = self.vae.config.z_dim
|
90
|
+
return num_channels_latents
|
diffusers/pipelines/__init__.py
CHANGED
@@ -140,6 +140,8 @@ else:
|
|
140
140
|
"FluxFillPipeline",
|
141
141
|
"FluxPriorReduxPipeline",
|
142
142
|
"ReduxImageEncoder",
|
143
|
+
"FluxKontextPipeline",
|
144
|
+
"FluxKontextInpaintPipeline",
|
143
145
|
]
|
144
146
|
_import_structure["audioldm"] = ["AudioLDMPipeline"]
|
145
147
|
_import_structure["audioldm2"] = [
|
@@ -378,6 +380,19 @@ else:
|
|
378
380
|
"WuerstchenPriorPipeline",
|
379
381
|
]
|
380
382
|
_import_structure["wan"] = ["WanPipeline", "WanImageToVideoPipeline", "WanVideoToVideoPipeline", "WanVACEPipeline"]
|
383
|
+
_import_structure["skyreels_v2"] = [
|
384
|
+
"SkyReelsV2DiffusionForcingPipeline",
|
385
|
+
"SkyReelsV2DiffusionForcingImageToVideoPipeline",
|
386
|
+
"SkyReelsV2DiffusionForcingVideoToVideoPipeline",
|
387
|
+
"SkyReelsV2ImageToVideoPipeline",
|
388
|
+
"SkyReelsV2Pipeline",
|
389
|
+
]
|
390
|
+
_import_structure["qwenimage"] = [
|
391
|
+
"QwenImagePipeline",
|
392
|
+
"QwenImageImg2ImgPipeline",
|
393
|
+
"QwenImageInpaintPipeline",
|
394
|
+
"QwenImageEditPipeline",
|
395
|
+
]
|
381
396
|
try:
|
382
397
|
if not is_onnx_available():
|
383
398
|
raise OptionalDependencyNotAvailable()
|
@@ -609,6 +624,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
609
624
|
FluxFillPipeline,
|
610
625
|
FluxImg2ImgPipeline,
|
611
626
|
FluxInpaintPipeline,
|
627
|
+
FluxKontextInpaintPipeline,
|
628
|
+
FluxKontextPipeline,
|
612
629
|
FluxPipeline,
|
613
630
|
FluxPriorReduxPipeline,
|
614
631
|
ReduxImageEncoder,
|
@@ -692,6 +709,12 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
692
709
|
from .paint_by_example import PaintByExamplePipeline
|
693
710
|
from .pia import PIAPipeline
|
694
711
|
from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
|
712
|
+
from .qwenimage import (
|
713
|
+
QwenImageEditPipeline,
|
714
|
+
QwenImageImg2ImgPipeline,
|
715
|
+
QwenImageInpaintPipeline,
|
716
|
+
QwenImagePipeline,
|
717
|
+
)
|
695
718
|
from .sana import SanaControlNetPipeline, SanaPipeline, SanaSprintImg2ImgPipeline, SanaSprintPipeline
|
696
719
|
from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
|
697
720
|
from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
|
@@ -847,6 +870,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
847
870
|
SpectrogramDiffusionPipeline,
|
848
871
|
)
|
849
872
|
|
873
|
+
from .skyreels_v2 import (
|
874
|
+
SkyReelsV2DiffusionForcingImageToVideoPipeline,
|
875
|
+
SkyReelsV2DiffusionForcingPipeline,
|
876
|
+
SkyReelsV2DiffusionForcingVideoToVideoPipeline,
|
877
|
+
SkyReelsV2ImageToVideoPipeline,
|
878
|
+
SkyReelsV2Pipeline,
|
879
|
+
)
|
880
|
+
|
850
881
|
else:
|
851
882
|
import sys
|
852
883
|
|
@@ -312,15 +312,14 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
312
312
|
The sequence of generated hidden-states.
|
313
313
|
"""
|
314
314
|
cache_position_kwargs = {}
|
315
|
-
if is_transformers_version("<", "4.52.
|
315
|
+
if is_transformers_version("<", "4.52.1"):
|
316
316
|
cache_position_kwargs["input_ids"] = inputs_embeds
|
317
|
-
cache_position_kwargs["model_kwargs"] = model_kwargs
|
318
317
|
else:
|
319
318
|
cache_position_kwargs["seq_length"] = inputs_embeds.shape[0]
|
320
319
|
cache_position_kwargs["device"] = (
|
321
320
|
self.language_model.device if getattr(self, "language_model", None) is not None else self.device
|
322
321
|
)
|
323
|
-
|
322
|
+
cache_position_kwargs["model_kwargs"] = model_kwargs
|
324
323
|
max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens
|
325
324
|
model_kwargs = self.language_model._get_initial_cache_position(**cache_position_kwargs)
|
326
325
|
|
@@ -49,6 +49,7 @@ from .flux import (
|
|
49
49
|
FluxControlPipeline,
|
50
50
|
FluxImg2ImgPipeline,
|
51
51
|
FluxInpaintPipeline,
|
52
|
+
FluxKontextPipeline,
|
52
53
|
FluxPipeline,
|
53
54
|
)
|
54
55
|
from .hunyuandit import HunyuanDiTPipeline
|
@@ -142,6 +143,7 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|
142
143
|
("flux", FluxPipeline),
|
143
144
|
("flux-control", FluxControlPipeline),
|
144
145
|
("flux-controlnet", FluxControlNetPipeline),
|
146
|
+
("flux-kontext", FluxKontextPipeline),
|
145
147
|
("lumina", LuminaPipeline),
|
146
148
|
("lumina2", Lumina2Pipeline),
|
147
149
|
("chroma", ChromaPipeline),
|
@@ -171,6 +173,7 @@ AUTO_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|
171
173
|
("flux", FluxImg2ImgPipeline),
|
172
174
|
("flux-controlnet", FluxControlNetImg2ImgPipeline),
|
173
175
|
("flux-control", FluxControlImg2ImgPipeline),
|
176
|
+
("flux-kontext", FluxKontextPipeline),
|
174
177
|
]
|
175
178
|
)
|
176
179
|
|
@@ -248,14 +251,15 @@ def _get_connected_pipeline(pipeline_cls):
|
|
248
251
|
return _get_task_class(AUTO_INPAINT_PIPELINES_MAPPING, pipeline_cls.__name__, throw_error_if_not_exist=False)
|
249
252
|
|
250
253
|
|
251
|
-
def
|
252
|
-
|
253
|
-
for
|
254
|
-
|
255
|
-
|
256
|
-
|
254
|
+
def _get_model(pipeline_class_name):
|
255
|
+
for task_mapping in SUPPORTED_TASKS_MAPPINGS:
|
256
|
+
for model_name, pipeline in task_mapping.items():
|
257
|
+
if pipeline.__name__ == pipeline_class_name:
|
258
|
+
return model_name
|
259
|
+
|
257
260
|
|
258
|
-
|
261
|
+
def _get_task_class(mapping, pipeline_class_name, throw_error_if_not_exist: bool = True):
|
262
|
+
model_name = _get_model(pipeline_class_name)
|
259
263
|
|
260
264
|
if model_name is not None:
|
261
265
|
task_class = mapping.get(model_name, None)
|
@@ -391,8 +395,8 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
391
395
|
|
392
396
|
<Tip>
|
393
397
|
|
394
|
-
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
|
395
|
-
|
398
|
+
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
|
399
|
+
auth login`.
|
396
400
|
|
397
401
|
</Tip>
|
398
402
|
|
@@ -686,8 +690,8 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
686
690
|
|
687
691
|
<Tip>
|
688
692
|
|
689
|
-
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
|
690
|
-
|
693
|
+
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
|
694
|
+
auth login`.
|
691
695
|
|
692
696
|
</Tip>
|
693
697
|
|
@@ -996,8 +1000,8 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
996
1000
|
|
997
1001
|
<Tip>
|
998
1002
|
|
999
|
-
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
|
1000
|
-
|
1003
|
+
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
|
1004
|
+
auth login`.
|
1001
1005
|
|
1002
1006
|
</Tip>
|
1003
1007
|
|
@@ -663,11 +663,11 @@ class ChromaPipeline(
|
|
663
663
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
664
664
|
will be used.
|
665
665
|
guidance_scale (`float`, *optional*, defaults to 3.5):
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
666
|
+
Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
|
667
|
+
a model to generate images more aligned with `prompt` at the expense of lower image quality.
|
668
|
+
|
669
|
+
Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
|
670
|
+
the [paper](https://huggingface.co/papers/2210.03142) to learn more.
|
671
671
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
672
672
|
The number of images to generate per prompt.
|
673
673
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -725,11 +725,11 @@ class ChromaImg2ImgPipeline(
|
|
725
725
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
726
726
|
will be used.
|
727
727
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
728
|
+
Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
|
729
|
+
a model to generate images more aligned with `prompt` at the expense of lower image quality.
|
730
|
+
|
731
|
+
Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
|
732
|
+
the [paper](https://huggingface.co/papers/2210.03142) to learn more.
|
733
733
|
strength (`float, *optional*, defaults to 0.9):
|
734
734
|
Conceptually, indicates how much to transform the reference image. Must be between 0 and 1. image will
|
735
735
|
be used as a starting point, adding more noise to it the larger the strength. The number of denoising
|
@@ -718,14 +718,15 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
718
718
|
timestep = t.expand(latent_model_input.shape[0])
|
719
719
|
|
720
720
|
# predict noise model_output
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
721
|
+
with self.transformer.cache_context("cond_uncond"):
|
722
|
+
noise_pred = self.transformer(
|
723
|
+
hidden_states=latent_model_input,
|
724
|
+
encoder_hidden_states=prompt_embeds,
|
725
|
+
timestep=timestep,
|
726
|
+
image_rotary_emb=image_rotary_emb,
|
727
|
+
attention_kwargs=attention_kwargs,
|
728
|
+
return_dict=False,
|
729
|
+
)[0]
|
729
730
|
noise_pred = noise_pred.float()
|
730
731
|
|
731
732
|
# perform guidance
|
@@ -784,14 +784,15 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
784
784
|
timestep = t.expand(latent_model_input.shape[0])
|
785
785
|
|
786
786
|
# predict noise model_output
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
787
|
+
with self.transformer.cache_context("cond_uncond"):
|
788
|
+
noise_pred = self.transformer(
|
789
|
+
hidden_states=latent_model_input,
|
790
|
+
encoder_hidden_states=prompt_embeds,
|
791
|
+
timestep=timestep,
|
792
|
+
image_rotary_emb=image_rotary_emb,
|
793
|
+
attention_kwargs=attention_kwargs,
|
794
|
+
return_dict=False,
|
795
|
+
)[0]
|
795
796
|
noise_pred = noise_pred.float()
|
796
797
|
|
797
798
|
# perform guidance
|
@@ -831,15 +831,16 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
831
831
|
timestep = t.expand(latent_model_input.shape[0])
|
832
832
|
|
833
833
|
# predict noise model_output
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
834
|
+
with self.transformer.cache_context("cond_uncond"):
|
835
|
+
noise_pred = self.transformer(
|
836
|
+
hidden_states=latent_model_input,
|
837
|
+
encoder_hidden_states=prompt_embeds,
|
838
|
+
timestep=timestep,
|
839
|
+
ofs=ofs_emb,
|
840
|
+
image_rotary_emb=image_rotary_emb,
|
841
|
+
attention_kwargs=attention_kwargs,
|
842
|
+
return_dict=False,
|
843
|
+
)[0]
|
843
844
|
noise_pred = noise_pred.float()
|
844
845
|
|
845
846
|
# perform guidance
|
@@ -799,14 +799,15 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
799
799
|
timestep = t.expand(latent_model_input.shape[0])
|
800
800
|
|
801
801
|
# predict noise model_output
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
802
|
+
with self.transformer.cache_context("cond_uncond"):
|
803
|
+
noise_pred = self.transformer(
|
804
|
+
hidden_states=latent_model_input,
|
805
|
+
encoder_hidden_states=prompt_embeds,
|
806
|
+
timestep=timestep,
|
807
|
+
image_rotary_emb=image_rotary_emb,
|
808
|
+
attention_kwargs=attention_kwargs,
|
809
|
+
return_dict=False,
|
810
|
+
)[0]
|
810
811
|
noise_pred = noise_pred.float()
|
811
812
|
|
812
813
|
# perform guidance
|
@@ -619,22 +619,10 @@ class CogView4Pipeline(DiffusionPipeline, CogView4LoraLoaderMixin):
|
|
619
619
|
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
620
620
|
timestep = t.expand(latents.shape[0])
|
621
621
|
|
622
|
-
|
623
|
-
|
624
|
-
encoder_hidden_states=prompt_embeds,
|
625
|
-
timestep=timestep,
|
626
|
-
original_size=original_size,
|
627
|
-
target_size=target_size,
|
628
|
-
crop_coords=crops_coords_top_left,
|
629
|
-
attention_kwargs=attention_kwargs,
|
630
|
-
return_dict=False,
|
631
|
-
)[0]
|
632
|
-
|
633
|
-
# perform guidance
|
634
|
-
if self.do_classifier_free_guidance:
|
635
|
-
noise_pred_uncond = self.transformer(
|
622
|
+
with self.transformer.cache_context("cond"):
|
623
|
+
noise_pred_cond = self.transformer(
|
636
624
|
hidden_states=latent_model_input,
|
637
|
-
encoder_hidden_states=
|
625
|
+
encoder_hidden_states=prompt_embeds,
|
638
626
|
timestep=timestep,
|
639
627
|
original_size=original_size,
|
640
628
|
target_size=target_size,
|
@@ -643,6 +631,19 @@ class CogView4Pipeline(DiffusionPipeline, CogView4LoraLoaderMixin):
|
|
643
631
|
return_dict=False,
|
644
632
|
)[0]
|
645
633
|
|
634
|
+
# perform guidance
|
635
|
+
if self.do_classifier_free_guidance:
|
636
|
+
with self.transformer.cache_context("uncond"):
|
637
|
+
noise_pred_uncond = self.transformer(
|
638
|
+
hidden_states=latent_model_input,
|
639
|
+
encoder_hidden_states=negative_prompt_embeds,
|
640
|
+
timestep=timestep,
|
641
|
+
original_size=original_size,
|
642
|
+
target_size=target_size,
|
643
|
+
crop_coords=crops_coords_top_left,
|
644
|
+
attention_kwargs=attention_kwargs,
|
645
|
+
return_dict=False,
|
646
|
+
)[0]
|
646
647
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
|
647
648
|
else:
|
648
649
|
noise_pred = noise_pred_cond
|
@@ -29,7 +29,7 @@ from ...utils.torch_utils import randn_tensor
|
|
29
29
|
from ..blip_diffusion.blip_image_processing import BlipImageProcessor
|
30
30
|
from ..blip_diffusion.modeling_blip2 import Blip2QFormerModel
|
31
31
|
from ..blip_diffusion.modeling_ctx_clip import ContextCLIPTextModel
|
32
|
-
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
32
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
33
33
|
|
34
34
|
|
35
35
|
if is_torch_xla_available():
|
@@ -88,7 +88,7 @@ EXAMPLE_DOC_STRING = """
|
|
88
88
|
"""
|
89
89
|
|
90
90
|
|
91
|
-
class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
91
|
+
class BlipDiffusionControlNetPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
92
92
|
"""
|
93
93
|
Pipeline for Canny Edge based Controlled subject-driven generation using Blip Diffusion.
|
94
94
|
|
@@ -116,6 +116,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
|
116
116
|
Position of the context token in the text encoder.
|
117
117
|
"""
|
118
118
|
|
119
|
+
_last_supported_version = "0.33.1"
|
119
120
|
model_cpu_offload_seq = "qformer->text_encoder->unet->vae"
|
120
121
|
|
121
122
|
def __init__(
|