PyPI - diffusers - Versions diffs - 0.34.0__py3-none-any.whl → 0.35.0__py3-none-any.whl - Mend

diffusers 0.34.0py3-none-any.whl → 0.35.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (191) hide show

diffusers/__init__.py +98 -1
diffusers/callbacks.py +35 -0
diffusers/commands/custom_blocks.py +134 -0
diffusers/commands/diffusers_cli.py +2 -0
diffusers/commands/fp16_safetensors.py +1 -1
diffusers/configuration_utils.py +11 -2
diffusers/dependency_versions_table.py +3 -3
diffusers/guiders/__init__.py +41 -0
diffusers/guiders/adaptive_projected_guidance.py +188 -0
diffusers/guiders/auto_guidance.py +190 -0
diffusers/guiders/classifier_free_guidance.py +141 -0
diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
diffusers/guiders/frequency_decoupled_guidance.py +327 -0
diffusers/guiders/guider_utils.py +309 -0
diffusers/guiders/perturbed_attention_guidance.py +271 -0
diffusers/guiders/skip_layer_guidance.py +262 -0
diffusers/guiders/smoothed_energy_guidance.py +251 -0
diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
diffusers/hooks/__init__.py +17 -0
diffusers/hooks/_common.py +56 -0
diffusers/hooks/_helpers.py +293 -0
diffusers/hooks/faster_cache.py +7 -6
diffusers/hooks/first_block_cache.py +259 -0
diffusers/hooks/group_offloading.py +292 -286
diffusers/hooks/hooks.py +56 -1
diffusers/hooks/layer_skip.py +263 -0
diffusers/hooks/layerwise_casting.py +2 -7
diffusers/hooks/pyramid_attention_broadcast.py +14 -11
diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
diffusers/hooks/utils.py +43 -0
diffusers/loaders/__init__.py +6 -0
diffusers/loaders/ip_adapter.py +255 -4
diffusers/loaders/lora_base.py +63 -30
diffusers/loaders/lora_conversion_utils.py +434 -53
diffusers/loaders/lora_pipeline.py +834 -37
diffusers/loaders/peft.py +28 -5
diffusers/loaders/single_file_model.py +44 -11
diffusers/loaders/single_file_utils.py +170 -2
diffusers/loaders/transformer_flux.py +9 -10
diffusers/loaders/transformer_sd3.py +6 -1
diffusers/loaders/unet.py +22 -5
diffusers/loaders/unet_loader_utils.py +5 -2
diffusers/models/__init__.py +8 -0
diffusers/models/attention.py +484 -3
diffusers/models/attention_dispatch.py +1218 -0
diffusers/models/attention_processor.py +105 -663
diffusers/models/auto_model.py +2 -2
diffusers/models/autoencoders/__init__.py +1 -0
diffusers/models/autoencoders/autoencoder_dc.py +14 -1
diffusers/models/autoencoders/autoencoder_kl.py +1 -1
diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -1
diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
diffusers/models/autoencoders/autoencoder_kl_wan.py +370 -40
diffusers/models/cache_utils.py +31 -9
diffusers/models/controlnets/controlnet_flux.py +5 -5
diffusers/models/controlnets/controlnet_union.py +4 -4
diffusers/models/embeddings.py +26 -34
diffusers/models/model_loading_utils.py +233 -1
diffusers/models/modeling_flax_utils.py +1 -2
diffusers/models/modeling_utils.py +159 -94
diffusers/models/transformers/__init__.py +2 -0
diffusers/models/transformers/transformer_chroma.py +16 -117
diffusers/models/transformers/transformer_cogview4.py +36 -2
diffusers/models/transformers/transformer_cosmos.py +11 -4
diffusers/models/transformers/transformer_flux.py +372 -132
diffusers/models/transformers/transformer_hunyuan_video.py +6 -0
diffusers/models/transformers/transformer_ltx.py +104 -23
diffusers/models/transformers/transformer_qwenimage.py +645 -0
diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
diffusers/models/transformers/transformer_wan.py +298 -85
diffusers/models/transformers/transformer_wan_vace.py +15 -21
diffusers/models/unets/unet_2d_condition.py +2 -1
diffusers/modular_pipelines/__init__.py +83 -0
diffusers/modular_pipelines/components_manager.py +1068 -0
diffusers/modular_pipelines/flux/__init__.py +66 -0
diffusers/modular_pipelines/flux/before_denoise.py +689 -0
diffusers/modular_pipelines/flux/decoders.py +109 -0
diffusers/modular_pipelines/flux/denoise.py +227 -0
diffusers/modular_pipelines/flux/encoders.py +412 -0
diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
diffusers/modular_pipelines/modular_pipeline.py +2446 -0
diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
diffusers/modular_pipelines/node_utils.py +665 -0
diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
diffusers/modular_pipelines/wan/__init__.py +66 -0
diffusers/modular_pipelines/wan/before_denoise.py +365 -0
diffusers/modular_pipelines/wan/decoders.py +105 -0
diffusers/modular_pipelines/wan/denoise.py +261 -0
diffusers/modular_pipelines/wan/encoders.py +242 -0
diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
diffusers/pipelines/__init__.py +31 -0
diffusers/pipelines/audioldm2/pipeline_audioldm2.py +2 -3
diffusers/pipelines/auto_pipeline.py +17 -13
diffusers/pipelines/chroma/pipeline_chroma.py +5 -5
diffusers/pipelines/chroma/pipeline_chroma_img2img.py +5 -5
diffusers/pipelines/cogvideo/pipeline_cogvideox.py +9 -8
diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +9 -8
diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +10 -9
diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +9 -8
diffusers/pipelines/cogview4/pipeline_cogview4.py +16 -15
diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +3 -2
diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +212 -93
diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +7 -3
diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +194 -92
diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +1 -1
diffusers/pipelines/dit/pipeline_dit.py +3 -1
diffusers/pipelines/flux/__init__.py +4 -0
diffusers/pipelines/flux/pipeline_flux.py +34 -26
diffusers/pipelines/flux/pipeline_flux_control.py +8 -8
diffusers/pipelines/flux/pipeline_flux_control_img2img.py +1 -1
diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1 -1
diffusers/pipelines/flux/pipeline_flux_controlnet.py +1 -1
diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +1 -1
diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1 -1
diffusers/pipelines/flux/pipeline_flux_fill.py +1 -1
diffusers/pipelines/flux/pipeline_flux_img2img.py +1 -1
diffusers/pipelines/flux/pipeline_flux_inpaint.py +1 -1
diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
diffusers/pipelines/flux/pipeline_output.py +6 -4
diffusers/pipelines/hidream_image/pipeline_hidream_image.py +5 -5
diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +25 -24
diffusers/pipelines/ltx/pipeline_ltx.py +13 -12
diffusers/pipelines/ltx/pipeline_ltx_condition.py +10 -9
diffusers/pipelines/ltx/pipeline_ltx_image2video.py +13 -12
diffusers/pipelines/mochi/pipeline_mochi.py +9 -8
diffusers/pipelines/pipeline_flax_utils.py +2 -2
diffusers/pipelines/pipeline_loading_utils.py +24 -2
diffusers/pipelines/pipeline_utils.py +22 -15
diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +3 -1
diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +20 -0
diffusers/pipelines/qwenimage/__init__.py +55 -0
diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
diffusers/pipelines/sana/pipeline_sana_sprint.py +5 -5
diffusers/pipelines/skyreels_v2/__init__.py +59 -0
diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -1
diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -1
diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +6 -5
diffusers/pipelines/wan/pipeline_wan.py +78 -20
diffusers/pipelines/wan/pipeline_wan_i2v.py +112 -32
diffusers/pipelines/wan/pipeline_wan_vace.py +1 -2
diffusers/quantizers/__init__.py +1 -177
diffusers/quantizers/base.py +11 -0
diffusers/quantizers/gguf/utils.py +92 -3
diffusers/quantizers/pipe_quant_config.py +202 -0
diffusers/quantizers/torchao/torchao_quantizer.py +26 -0
diffusers/schedulers/scheduling_deis_multistep.py +8 -1
diffusers/schedulers/scheduling_dpmsolver_multistep.py +6 -0
diffusers/schedulers/scheduling_dpmsolver_singlestep.py +6 -0
diffusers/schedulers/scheduling_scm.py +0 -1
diffusers/schedulers/scheduling_unipc_multistep.py +10 -1
diffusers/schedulers/scheduling_utils.py +2 -2
diffusers/schedulers/scheduling_utils_flax.py +1 -1
diffusers/training_utils.py +78 -0
diffusers/utils/__init__.py +10 -0
diffusers/utils/constants.py +4 -0
diffusers/utils/dummy_pt_objects.py +312 -0
diffusers/utils/dummy_torch_and_transformers_objects.py +255 -0
diffusers/utils/dynamic_modules_utils.py +84 -25
diffusers/utils/hub_utils.py +33 -17
diffusers/utils/import_utils.py +70 -0
diffusers/utils/peft_utils.py +11 -8
diffusers/utils/testing_utils.py +136 -10
diffusers/utils/torch_utils.py +18 -0
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/METADATA +6 -6
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/RECORD +191 -127
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/WHEEL +0 -0
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0

diffusers/hooks/hooks.py CHANGED Viewed

@@ -18,11 +18,44 @@ from typing import Any, Dict, Optional, Tuple
 import torch
 from ..utils.logging import get_logger
+from ..utils.torch_utils import unwrap_module
 logger = get_logger(__name__)  # pylint: disable=invalid-name
+class BaseState:
+    def reset(self, *args, **kwargs) -> None:
+        raise NotImplementedError(
+            "BaseState::reset is not implemented. Please implement this method in the derived class."
+        )
+class StateManager:
+    def __init__(self, state_cls: BaseState, init_args=None, init_kwargs=None):
+        self._state_cls = state_cls
+        self._init_args = init_args if init_args is not None else ()
+        self._init_kwargs = init_kwargs if init_kwargs is not None else {}
+        self._state_cache = {}
+        self._current_context = None
+    def get_state(self):
+        if self._current_context is None:
+            raise ValueError("No context is set. Please set a context before retrieving the state.")
+        if self._current_context not in self._state_cache.keys():
+            self._state_cache[self._current_context] = self._state_cls(*self._init_args, **self._init_kwargs)
+        return self._state_cache[self._current_context]
+    def set_context(self, name: str) -> None:
+        self._current_context = name
+    def reset(self, *args, **kwargs) -> None:
+        for name, state in list(self._state_cache.items()):
+            state.reset(*args, **kwargs)
+            self._state_cache.pop(name)
+        self._current_context = None
 class ModelHook:
     r"""
     A hook that contains callbacks to be executed just before and after the forward method of a model.
@@ -99,6 +132,14 @@ class ModelHook:
             raise NotImplementedError("This hook is stateful and needs to implement the `reset_state` method.")
         return module
+    def _set_context(self, module: torch.nn.Module, name: str) -> None:
+        # Iterate over all attributes of the hook to see if any of them have the type `StateManager`. If so, call `set_context` on them.
+        for attr_name in dir(self):
+            attr = getattr(self, attr_name)
+            if isinstance(attr, StateManager):
+                attr.set_context(name)
+        return module
 class HookFunctionReference:
     def __init__(self) -> None:
@@ -211,9 +252,10 @@ class HookRegistry:
                 hook.reset_state(self._module_ref)
         if recurse:
-            for module_name, module in self._module_ref.named_modules():
+            for module_name, module in unwrap_module(self._module_ref).named_modules():
                 if module_name == "":
                     continue
+                module = unwrap_module(module)
                 if hasattr(module, "_diffusers_hook"):
                     module._diffusers_hook.reset_stateful_hooks(recurse=False)
@@ -223,6 +265,19 @@ class HookRegistry:
             module._diffusers_hook = cls(module)
         return module._diffusers_hook
+    def _set_context(self, name: Optional[str] = None) -> None:
+        for hook_name in reversed(self._hook_order):
+            hook = self.hooks[hook_name]
+            if hook._is_stateful:
+                hook._set_context(self._module_ref, name)
+        for module_name, module in unwrap_module(self._module_ref).named_modules():
+            if module_name == "":
+                continue
+            module = unwrap_module(module)
+            if hasattr(module, "_diffusers_hook"):
+                module._diffusers_hook._set_context(name)
     def __repr__(self) -> str:
         registry_repr = ""
         for i, hook_name in enumerate(self._hook_order):

diffusers/hooks/layer_skip.py ADDED Viewed

@@ -0,0 +1,263 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from dataclasses import asdict, dataclass
+from typing import Callable, List, Optional
+import torch
+from ..utils import get_logger
+from ..utils.torch_utils import unwrap_module
+from ._common import (
+    _ALL_TRANSFORMER_BLOCK_IDENTIFIERS,
+    _ATTENTION_CLASSES,
+    _FEEDFORWARD_CLASSES,
+    _get_submodule_from_fqn,
+)
+from ._helpers import AttentionProcessorRegistry, TransformerBlockRegistry
+from .hooks import HookRegistry, ModelHook
+logger = get_logger(__name__)  # pylint: disable=invalid-name
+_LAYER_SKIP_HOOK = "layer_skip_hook"
+# Aryan/YiYi TODO: we need to make guider class a config mixin so I think this is not needed
+# either remove or make it serializable
+@dataclass
+class LayerSkipConfig:
+    r"""
+    Configuration for skipping internal transformer blocks when executing a transformer model.
+    Args:
+        indices (`List[int]`):
+            The indices of the layer to skip. This is typically the first layer in the transformer block.
+        fqn (`str`, defaults to `"auto"`):
+            The fully qualified name identifying the stack of transformer blocks. Typically, this is
+            `transformer_blocks`, `single_transformer_blocks`, `blocks`, `layers`, or `temporal_transformer_blocks`.
+            For automatic detection, set this to `"auto"`. "auto" only works on DiT models. For UNet models, you must
+            provide the correct fqn.
+        skip_attention (`bool`, defaults to `True`):
+            Whether to skip attention blocks.
+        skip_ff (`bool`, defaults to `True`):
+            Whether to skip feed-forward blocks.
+        skip_attention_scores (`bool`, defaults to `False`):
+            Whether to skip attention score computation in the attention blocks. This is equivalent to using `value`
+            projections as the output of scaled dot product attention.
+        dropout (`float`, defaults to `1.0`):
+            The dropout probability for dropping the outputs of the skipped layers. By default, this is set to `1.0`,
+            meaning that the outputs of the skipped layers are completely ignored. If set to `0.0`, the outputs of the
+            skipped layers are fully retained, which is equivalent to not skipping any layers.
+    """
+    indices: List[int]
+    fqn: str = "auto"
+    skip_attention: bool = True
+    skip_attention_scores: bool = False
+    skip_ff: bool = True
+    dropout: float = 1.0
+    def __post_init__(self):
+        if not (0 <= self.dropout <= 1):
+            raise ValueError(f"Expected `dropout` to be between 0.0 and 1.0, but got {self.dropout}.")
+        if not math.isclose(self.dropout, 1.0) and self.skip_attention_scores:
+            raise ValueError(
+                "Cannot set `skip_attention_scores` to True when `dropout` is not 1.0. Please set `dropout` to 1.0."
+            )
+    def to_dict(self):
+        return asdict(self)
+    @staticmethod
+    def from_dict(data: dict) -> "LayerSkipConfig":
+        return LayerSkipConfig(**data)
+class AttentionScoreSkipFunctionMode(torch.overrides.TorchFunctionMode):
+    def __torch_function__(self, func, types, args=(), kwargs=None):
+        if kwargs is None:
+            kwargs = {}
+        if func is torch.nn.functional.scaled_dot_product_attention:
+            query = kwargs.get("query", None)
+            key = kwargs.get("key", None)
+            value = kwargs.get("value", None)
+            query = query if query is not None else args[0]
+            key = key if key is not None else args[1]
+            value = value if value is not None else args[2]
+            # If the Q sequence length does not match KV sequence length, methods like
+            # Perturbed Attention Guidance cannot be used (because the caller expects
+            # the same sequence length as Q, but if we return V here, it will not match).
+            # When Q.shape[2] != V.shape[2], PAG will essentially not be applied and
+            # the overall effect would that be of normal CFG with a scale of (guidance_scale + perturbed_guidance_scale).
+            if query.shape[2] == value.shape[2]:
+                return value
+        return func(*args, **kwargs)
+class AttentionProcessorSkipHook(ModelHook):
+    def __init__(self, skip_processor_output_fn: Callable, skip_attention_scores: bool = False, dropout: float = 1.0):
+        self.skip_processor_output_fn = skip_processor_output_fn
+        self.skip_attention_scores = skip_attention_scores
+        self.dropout = dropout
+    def new_forward(self, module: torch.nn.Module, *args, **kwargs):
+        if self.skip_attention_scores:
+            if not math.isclose(self.dropout, 1.0):
+                raise ValueError(
+                    "Cannot set `skip_attention_scores` to True when `dropout` is not 1.0. Please set `dropout` to 1.0."
+                )
+            with AttentionScoreSkipFunctionMode():
+                output = self.fn_ref.original_forward(*args, **kwargs)
+        else:
+            if math.isclose(self.dropout, 1.0):
+                output = self.skip_processor_output_fn(module, *args, **kwargs)
+            else:
+                output = self.fn_ref.original_forward(*args, **kwargs)
+                output = torch.nn.functional.dropout(output, p=self.dropout)
+        return output
+class FeedForwardSkipHook(ModelHook):
+    def __init__(self, dropout: float):
+        super().__init__()
+        self.dropout = dropout
+    def new_forward(self, module: torch.nn.Module, *args, **kwargs):
+        if math.isclose(self.dropout, 1.0):
+            output = kwargs.get("hidden_states", None)
+            if output is None:
+                output = kwargs.get("x", None)
+            if output is None and len(args) > 0:
+                output = args[0]
+        else:
+            output = self.fn_ref.original_forward(*args, **kwargs)
+            output = torch.nn.functional.dropout(output, p=self.dropout)
+        return output
+class TransformerBlockSkipHook(ModelHook):
+    def __init__(self, dropout: float):
+        super().__init__()
+        self.dropout = dropout
+    def initialize_hook(self, module):
+        self._metadata = TransformerBlockRegistry.get(unwrap_module(module).__class__)
+        return module
+    def new_forward(self, module: torch.nn.Module, *args, **kwargs):
+        if math.isclose(self.dropout, 1.0):
+            original_hidden_states = self._metadata._get_parameter_from_args_kwargs("hidden_states", args, kwargs)
+            if self._metadata.return_encoder_hidden_states_index is None:
+                output = original_hidden_states
+            else:
+                original_encoder_hidden_states = self._metadata._get_parameter_from_args_kwargs(
+                    "encoder_hidden_states", args, kwargs
+                )
+                output = (original_hidden_states, original_encoder_hidden_states)
+        else:
+            output = self.fn_ref.original_forward(*args, **kwargs)
+            output = torch.nn.functional.dropout(output, p=self.dropout)
+        return output
+def apply_layer_skip(module: torch.nn.Module, config: LayerSkipConfig) -> None:
+    r"""
+    Apply layer skipping to internal layers of a transformer.
+    Args:
+        module (`torch.nn.Module`):
+            The transformer model to which the layer skip hook should be applied.
+        config (`LayerSkipConfig`):
+            The configuration for the layer skip hook.
+    Example:
+    ```python
+    >>> from diffusers import apply_layer_skip_hook, CogVideoXTransformer3DModel, LayerSkipConfig
+    >>> transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16)
+    >>> config = LayerSkipConfig(layer_index=[10, 20], fqn="transformer_blocks")
+    >>> apply_layer_skip_hook(transformer, config)
+    ```
+    """
+    _apply_layer_skip_hook(module, config)
+def _apply_layer_skip_hook(module: torch.nn.Module, config: LayerSkipConfig, name: Optional[str] = None) -> None:
+    name = name or _LAYER_SKIP_HOOK
+    if config.skip_attention and config.skip_attention_scores:
+        raise ValueError("Cannot set both `skip_attention` and `skip_attention_scores` to True. Please choose one.")
+    if not math.isclose(config.dropout, 1.0) and config.skip_attention_scores:
+        raise ValueError(
+            "Cannot set `skip_attention_scores` to True when `dropout` is not 1.0. Please set `dropout` to 1.0."
+        )
+    if config.fqn == "auto":
+        for identifier in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS:
+            if hasattr(module, identifier):
+                config.fqn = identifier
+                break
+        else:
+            raise ValueError(
+                "Could not find a suitable identifier for the transformer blocks automatically. Please provide a valid "
+                "`fqn` (fully qualified name) that identifies a stack of transformer blocks."
+            )
+    transformer_blocks = _get_submodule_from_fqn(module, config.fqn)
+    if transformer_blocks is None or not isinstance(transformer_blocks, torch.nn.ModuleList):
+        raise ValueError(
+            f"Could not find {config.fqn} in the provided module, or configured `fqn` (fully qualified name) does not identify "
+            f"a `torch.nn.ModuleList`. Please provide a valid `fqn` that identifies a stack of transformer blocks."
+        )
+    if len(config.indices) == 0:
+        raise ValueError("Layer index list is empty. Please provide a non-empty list of layer indices to skip.")
+    blocks_found = False
+    for i, block in enumerate(transformer_blocks):
+        if i not in config.indices:
+            continue
+        blocks_found = True
+        if config.skip_attention and config.skip_ff:
+            logger.debug(f"Applying TransformerBlockSkipHook to '{config.fqn}.{i}'")
+            registry = HookRegistry.check_if_exists_or_initialize(block)
+            hook = TransformerBlockSkipHook(config.dropout)
+            registry.register_hook(hook, name)
+        elif config.skip_attention or config.skip_attention_scores:
+            for submodule_name, submodule in block.named_modules():
+                if isinstance(submodule, _ATTENTION_CLASSES) and not submodule.is_cross_attention:
+                    logger.debug(f"Applying AttentionProcessorSkipHook to '{config.fqn}.{i}.{submodule_name}'")
+                    output_fn = AttentionProcessorRegistry.get(submodule.processor.__class__).skip_processor_output_fn
+                    registry = HookRegistry.check_if_exists_or_initialize(submodule)
+                    hook = AttentionProcessorSkipHook(output_fn, config.skip_attention_scores, config.dropout)
+                    registry.register_hook(hook, name)
+        if config.skip_ff:
+            for submodule_name, submodule in block.named_modules():
+                if isinstance(submodule, _FEEDFORWARD_CLASSES):
+                    logger.debug(f"Applying FeedForwardSkipHook to '{config.fqn}.{i}.{submodule_name}'")
+                    registry = HookRegistry.check_if_exists_or_initialize(submodule)
+                    hook = FeedForwardSkipHook(config.dropout)
+                    registry.register_hook(hook, name)
+    if not blocks_found:
+        raise ValueError(
+            f"Could not find any transformer blocks matching the provided indices {config.indices} and "
+            f"fully qualified name '{config.fqn}'. Please check the indices and fqn for correctness."
+        )

diffusers/hooks/layerwise_casting.py CHANGED Viewed

@@ -18,6 +18,7 @@ from typing import Optional, Tuple, Type, Union
 import torch
 from ..utils import get_logger, is_peft_available, is_peft_version
+from ._common import _GO_LC_SUPPORTED_PYTORCH_LAYERS
 from .hooks import HookRegistry, ModelHook
@@ -27,12 +28,6 @@ logger = get_logger(__name__)  # pylint: disable=invalid-name
 # fmt: off
 _LAYERWISE_CASTING_HOOK = "layerwise_casting"
 _PEFT_AUTOCAST_DISABLE_HOOK = "peft_autocast_disable"
-SUPPORTED_PYTORCH_LAYERS = (
-    torch.nn.Conv1d, torch.nn.Conv2d, torch.nn.Conv3d,
-    torch.nn.ConvTranspose1d, torch.nn.ConvTranspose2d, torch.nn.ConvTranspose3d,
-    torch.nn.Linear,
-)
 DEFAULT_SKIP_MODULES_PATTERN = ("pos_embed", "patch_embed", "norm", "^proj_in$", "^proj_out$")
 # fmt: on
@@ -186,7 +181,7 @@ def _apply_layerwise_casting(
         logger.debug(f'Skipping layerwise casting for layer "{_prefix}"')
         return
-    if isinstance(module, SUPPORTED_PYTORCH_LAYERS):
+    if isinstance(module, _GO_LC_SUPPORTED_PYTORCH_LAYERS):
         logger.debug(f'Applying layerwise casting to layer "{_prefix}"')
         apply_layerwise_casting_hook(module, storage_dtype, compute_dtype, non_blocking)
         return

diffusers/hooks/pyramid_attention_broadcast.py CHANGED Viewed

@@ -18,8 +18,15 @@ from typing import Any, Callable, Optional, Tuple, Union
 import torch
+from ..models.attention import AttentionModuleMixin
 from ..models.attention_processor import Attention, MochiAttention
 from ..utils import logging
+from ._common import (
+    _ATTENTION_CLASSES,
+    _CROSS_TRANSFORMER_BLOCK_IDENTIFIERS,
+    _SPATIAL_TRANSFORMER_BLOCK_IDENTIFIERS,
+    _TEMPORAL_TRANSFORMER_BLOCK_IDENTIFIERS,
+)
 from .hooks import HookRegistry, ModelHook
@@ -27,10 +34,6 @@ logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 _PYRAMID_ATTENTION_BROADCAST_HOOK = "pyramid_attention_broadcast"
-_ATTENTION_CLASSES = (Attention, MochiAttention)
-_SPATIAL_ATTENTION_BLOCK_IDENTIFIERS = ("blocks", "transformer_blocks", "single_transformer_blocks")
-_TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS = ("temporal_transformer_blocks",)
-_CROSS_ATTENTION_BLOCK_IDENTIFIERS = ("blocks", "transformer_blocks")
 @dataclass
@@ -60,11 +63,11 @@ class PyramidAttentionBroadcastConfig:
         cross_attention_timestep_skip_range (`Tuple[int, int]`, defaults to `(100, 800)`):
             The range of timesteps to skip in the cross-attention layer. The attention computations will be
             conditionally skipped if the current timestep is within the specified range.
-        spatial_attention_block_identifiers (`Tuple[str, ...]`, defaults to `("blocks", "transformer_blocks")`):
+        spatial_attention_block_identifiers (`Tuple[str, ...]`):
             The identifiers to match against the layer names to determine if the layer is a spatial attention layer.
-        temporal_attention_block_identifiers (`Tuple[str, ...]`, defaults to `("temporal_transformer_blocks",)`):
+        temporal_attention_block_identifiers (`Tuple[str, ...]`):
             The identifiers to match against the layer names to determine if the layer is a temporal attention layer.
-        cross_attention_block_identifiers (`Tuple[str, ...]`, defaults to `("blocks", "transformer_blocks")`):
+        cross_attention_block_identifiers (`Tuple[str, ...]`):
             The identifiers to match against the layer names to determine if the layer is a cross-attention layer.
     """
@@ -76,9 +79,9 @@ class PyramidAttentionBroadcastConfig:
     temporal_attention_timestep_skip_range: Tuple[int, int] = (100, 800)
     cross_attention_timestep_skip_range: Tuple[int, int] = (100, 800)
-    spatial_attention_block_identifiers: Tuple[str, ...] = _SPATIAL_ATTENTION_BLOCK_IDENTIFIERS
-    temporal_attention_block_identifiers: Tuple[str, ...] = _TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS
-    cross_attention_block_identifiers: Tuple[str, ...] = _CROSS_ATTENTION_BLOCK_IDENTIFIERS
+    spatial_attention_block_identifiers: Tuple[str, ...] = _SPATIAL_TRANSFORMER_BLOCK_IDENTIFIERS
+    temporal_attention_block_identifiers: Tuple[str, ...] = _TEMPORAL_TRANSFORMER_BLOCK_IDENTIFIERS
+    cross_attention_block_identifiers: Tuple[str, ...] = _CROSS_TRANSFORMER_BLOCK_IDENTIFIERS
     current_timestep_callback: Callable[[], int] = None
@@ -227,7 +230,7 @@ def apply_pyramid_attention_broadcast(module: torch.nn.Module, config: PyramidAt
         config.spatial_attention_block_skip_range = 2
     for name, submodule in module.named_modules():
-        if not isinstance(submodule, _ATTENTION_CLASSES):
+        if not isinstance(submodule, (*_ATTENTION_CLASSES, AttentionModuleMixin)):
             # PAB has been implemented specific to Diffusers' Attention classes. However, this does not mean that PAB
             # cannot be applied to this layer. For custom layers, users can extend this functionality and implement
             # their own PAB logic similar to `_apply_pyramid_attention_broadcast_on_attention_class`.

diffusers/hooks/smoothed_energy_guidance_utils.py ADDED Viewed

@@ -0,0 +1,167 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from dataclasses import asdict, dataclass
+from typing import List, Optional
+import torch
+import torch.nn.functional as F
+from ..utils import get_logger
+from ._common import _ALL_TRANSFORMER_BLOCK_IDENTIFIERS, _ATTENTION_CLASSES, _get_submodule_from_fqn
+from .hooks import HookRegistry, ModelHook
+logger = get_logger(__name__)  # pylint: disable=invalid-name
+_SMOOTHED_ENERGY_GUIDANCE_HOOK = "smoothed_energy_guidance_hook"
+@dataclass
+class SmoothedEnergyGuidanceConfig:
+    r"""
+    Configuration for skipping internal transformer blocks when executing a transformer model.
+    Args:
+        indices (`List[int]`):
+            The indices of the layer to skip. This is typically the first layer in the transformer block.
+        fqn (`str`, defaults to `"auto"`):
+            The fully qualified name identifying the stack of transformer blocks. Typically, this is
+            `transformer_blocks`, `single_transformer_blocks`, `blocks`, `layers`, or `temporal_transformer_blocks`.
+            For automatic detection, set this to `"auto"`. "auto" only works on DiT models. For UNet models, you must
+            provide the correct fqn.
+        _query_proj_identifiers (`List[str]`, defaults to `None`):
+            The identifiers for the query projection layers. Typically, these are `to_q`, `query`, or `q_proj`. If
+            `None`, `to_q` is used by default.
+    """
+    indices: List[int]
+    fqn: str = "auto"
+    _query_proj_identifiers: List[str] = None
+    def to_dict(self):
+        return asdict(self)
+    @staticmethod
+    def from_dict(data: dict) -> "SmoothedEnergyGuidanceConfig":
+        return SmoothedEnergyGuidanceConfig(**data)
+class SmoothedEnergyGuidanceHook(ModelHook):
+    def __init__(self, blur_sigma: float = 1.0, blur_threshold_inf: float = 9999.9) -> None:
+        super().__init__()
+        self.blur_sigma = blur_sigma
+        self.blur_threshold_inf = blur_threshold_inf
+    def post_forward(self, module: torch.nn.Module, output: torch.Tensor) -> torch.Tensor:
+        # Copied from https://github.com/SusungHong/SEG-SDXL/blob/cf8256d640d5373541cfea3b3b6caf93272cf986/pipeline_seg.py#L172C31-L172C102
+        kernel_size = math.ceil(6 * self.blur_sigma) + 1 - math.ceil(6 * self.blur_sigma) % 2
+        smoothed_output = _gaussian_blur_2d(output, kernel_size, self.blur_sigma, self.blur_threshold_inf)
+        return smoothed_output
+def _apply_smoothed_energy_guidance_hook(
+    module: torch.nn.Module, config: SmoothedEnergyGuidanceConfig, blur_sigma: float, name: Optional[str] = None
+) -> None:
+    name = name or _SMOOTHED_ENERGY_GUIDANCE_HOOK
+    if config.fqn == "auto":
+        for identifier in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS:
+            if hasattr(module, identifier):
+                config.fqn = identifier
+                break
+        else:
+            raise ValueError(
+                "Could not find a suitable identifier for the transformer blocks automatically. Please provide a valid "
+                "`fqn` (fully qualified name) that identifies a stack of transformer blocks."
+            )
+    if config._query_proj_identifiers is None:
+        config._query_proj_identifiers = ["to_q"]
+    transformer_blocks = _get_submodule_from_fqn(module, config.fqn)
+    blocks_found = False
+    for i, block in enumerate(transformer_blocks):
+        if i not in config.indices:
+            continue
+        blocks_found = True
+        for submodule_name, submodule in block.named_modules():
+            if not isinstance(submodule, _ATTENTION_CLASSES) or submodule.is_cross_attention:
+                continue
+            for identifier in config._query_proj_identifiers:
+                query_proj = getattr(submodule, identifier, None)
+                if query_proj is None or not isinstance(query_proj, torch.nn.Linear):
+                    continue
+                logger.debug(
+                    f"Registering smoothed energy guidance hook on {config.fqn}.{i}.{submodule_name}.{identifier}"
+                )
+                registry = HookRegistry.check_if_exists_or_initialize(query_proj)
+                hook = SmoothedEnergyGuidanceHook(blur_sigma)
+                registry.register_hook(hook, name)
+    if not blocks_found:
+        raise ValueError(
+            f"Could not find any transformer blocks matching the provided indices {config.indices} and "
+            f"fully qualified name '{config.fqn}'. Please check the indices and fqn for correctness."
+        )
+# Modified from https://github.com/SusungHong/SEG-SDXL/blob/cf8256d640d5373541cfea3b3b6caf93272cf986/pipeline_seg.py#L71
+def _gaussian_blur_2d(query: torch.Tensor, kernel_size: int, sigma: float, sigma_threshold_inf: float) -> torch.Tensor:
+    """
+    This implementation assumes that the input query is for visual (image/videos) tokens to apply the 2D gaussian blur.
+    However, some models use joint text-visual token attention for which this may not be suitable. Additionally, this
+    implementation also assumes that the visual tokens come from a square image/video. In practice, despite these
+    assumptions, applying the 2D square gaussian blur on the query projections generates reasonable results for
+    Smoothed Energy Guidance.
+    SEG is only supported as an experimental prototype feature for now, so the implementation may be modified in the
+    future without warning or guarantee of reproducibility.
+    """
+    assert query.ndim == 3
+    is_inf = sigma > sigma_threshold_inf
+    batch_size, seq_len, embed_dim = query.shape
+    seq_len_sqrt = int(math.sqrt(seq_len))
+    num_square_tokens = seq_len_sqrt * seq_len_sqrt
+    query_slice = query[:, :num_square_tokens, :]
+    query_slice = query_slice.permute(0, 2, 1)
+    query_slice = query_slice.reshape(batch_size, embed_dim, seq_len_sqrt, seq_len_sqrt)
+    if is_inf:
+        kernel_size = min(kernel_size, seq_len_sqrt - (seq_len_sqrt % 2 - 1))
+        kernel_size_half = (kernel_size - 1) / 2
+        x = torch.linspace(-kernel_size_half, kernel_size_half, steps=kernel_size)
+        pdf = torch.exp(-0.5 * (x / sigma).pow(2))
+        kernel1d = pdf / pdf.sum()
+        kernel1d = kernel1d.to(query)
+        kernel2d = torch.matmul(kernel1d[:, None], kernel1d[None, :])
+        kernel2d = kernel2d.expand(embed_dim, 1, kernel2d.shape[0], kernel2d.shape[1])
+        padding = [kernel_size // 2, kernel_size // 2, kernel_size // 2, kernel_size // 2]
+        query_slice = F.pad(query_slice, padding, mode="reflect")
+        query_slice = F.conv2d(query_slice, kernel2d, groups=embed_dim)
+    else:
+        query_slice[:] = query_slice.mean(dim=(-2, -1), keepdim=True)
+    query_slice = query_slice.reshape(batch_size, embed_dim, num_square_tokens)
+    query_slice = query_slice.permute(0, 2, 1)
+    query[:, :num_square_tokens, :] = query_slice.clone()
+    return query

diffusers/hooks/utils.py ADDED Viewed

@@ -0,0 +1,43 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from ._common import _ALL_TRANSFORMER_BLOCK_IDENTIFIERS, _ATTENTION_CLASSES, _FEEDFORWARD_CLASSES
+def _get_identifiable_transformer_blocks_in_module(module: torch.nn.Module):
+    module_list_with_transformer_blocks = []
+    for name, submodule in module.named_modules():
+        name_endswith_identifier = any(name.endswith(identifier) for identifier in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS)
+        is_modulelist = isinstance(submodule, torch.nn.ModuleList)
+        if name_endswith_identifier and is_modulelist:
+            module_list_with_transformer_blocks.append((name, submodule))
+    return module_list_with_transformer_blocks
+def _get_identifiable_attention_layers_in_module(module: torch.nn.Module):
+    attention_layers = []
+    for name, submodule in module.named_modules():
+        if isinstance(submodule, _ATTENTION_CLASSES):
+            attention_layers.append((name, submodule))
+    return attention_layers
+def _get_identifiable_feedforward_layers_in_module(module: torch.nn.Module):
+    feedforward_layers = []
+    for name, submodule in module.named_modules():
+        if isinstance(submodule, _FEEDFORWARD_CLASSES):
+            feedforward_layers.append((name, submodule))
+    return feedforward_layers

diffusers 0.34.0__py3-none-any.whl → 0.35.0__py3-none-any.whl

diffusers 0.34.0py3-none-any.whl → 0.35.0py3-none-any.whl