PyPI - cache-dit - Versions diffs - 0.2.31__py3-none-any.whl → 0.2.33__py3-none-any.whl - Mend

cache-dit 0.2.31py3-none-any.whl → 0.2.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cache-dit might be problematic. Click here for more details.

Files changed (18) hide show

cache_dit/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.2.31'
-__version_tuple__ = version_tuple = (0, 2, 31)
+__version__ = version = '0.2.33'
+__version_tuple__ = version_tuple = (0, 2, 33)
 __commit_id__ = commit_id = None

cache_dit/cache_factory/block_adapters/__init__.py CHANGED Viewed

@@ -254,7 +254,7 @@ def skyreelsv2_adapter(pipe, **kwargs) -> BlockAdapter:
     )
-@BlockAdapterRegistry.register("SD3")
+@BlockAdapterRegistry.register("StableDiffusion3")
 def sd3_adapter(pipe, **kwargs) -> BlockAdapter:
     from diffusers import SD3Transformer2DModel

cache_dit/cache_factory/block_adapters/block_adapters.py CHANGED Viewed

@@ -579,7 +579,7 @@ class BlockAdapter:
             assert isinstance(adapter[0], torch.nn.Module)
             return getattr(adapter[0], "_is_cached", False)
         else:
-            raise TypeError(f"Can't check this type: {type(adapter)}!")
+            return getattr(adapter, "_is_cached", False)
     @classmethod
     def nested_depth(cls, obj: Any):

cache_dit/cache_factory/cache_adapters.py CHANGED Viewed

@@ -114,7 +114,7 @@ class CachedAdapter:
         **cache_context_kwargs,
     ):
         # Check cache_context_kwargs
-        if not cache_context_kwargs["enable_spearate_cfg"]:
+        if cache_context_kwargs["enable_spearate_cfg"] is None:
             # Check cfg for some specific case if users don't set it as True
             if BlockAdapterRegistry.has_separate_cfg(block_adapter):
                 cache_context_kwargs["enable_spearate_cfg"] = True
@@ -131,6 +131,12 @@ class CachedAdapter:
                     f"register: {cache_context_kwargs['enable_spearate_cfg']}, "
                     f"Pipeline: {block_adapter.pipe.__class__.__name__}."
                 )
+        else:
+            logger.info(
+                f"Use custom 'enable_spearate_cfg' from cache context "
+                f"kwargs: {cache_context_kwargs['enable_spearate_cfg']}. "
+                f"Pipeline: {block_adapter.pipe.__class__.__name__}."
+            )
         if (
             cache_type := cache_context_kwargs.pop("cache_type", None)

cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import torch
-from typing import Dict, Any
 from cache_dit.cache_factory import ForwardPattern
 from cache_dit.cache_factory.cache_blocks.pattern_base import (
     CachedBlocks_Pattern_Base,
@@ -24,14 +23,12 @@ class CachedBlocks_Pattern_3_4_5(CachedBlocks_Pattern_Base):
         **kwargs,
     ):
         # Use it's own cache context.
-        self.cache_manager.set_context(
-            self.cache_context,
-        )
+        self.cache_manager.set_context(self.cache_context)
+        self._check_cache_params()
         original_hidden_states = hidden_states
         # Call first `n` blocks to process the hidden states for
         # more stable diff calculation.
-        # encoder_hidden_states: None Pattern 3, else 4, 5
         hidden_states, new_encoder_hidden_states = self.call_Fn_blocks(
             hidden_states,
             *args,
@@ -109,10 +106,7 @@ class CachedBlocks_Pattern_3_4_5(CachedBlocks_Pattern_Base):
                 *args,
                 **kwargs,
             )
-            if new_encoder_hidden_states is not None:
-                new_encoder_hidden_states_residual = (
-                    new_encoder_hidden_states - old_encoder_hidden_states
-                )
             torch._dynamo.graph_break()
             if self.cache_manager.is_cache_residual():
                 self.cache_manager.set_Bn_buffer(
@@ -125,6 +119,10 @@ class CachedBlocks_Pattern_3_4_5(CachedBlocks_Pattern_Base):
                     prefix=f"{self.cache_prefix}_Bn_hidden_states",
                 )
+            if new_encoder_hidden_states is not None:
+                new_encoder_hidden_states_residual = (
+                    new_encoder_hidden_states - old_encoder_hidden_states
+                )
             if self.cache_manager.is_encoder_cache_residual():
                 if new_encoder_hidden_states is not None:
                     self.cache_manager.set_Bn_encoder_buffer(
@@ -159,27 +157,12 @@ class CachedBlocks_Pattern_3_4_5(CachedBlocks_Pattern_Base):
             )
         )
-    @torch.compiler.disable
-    def maybe_update_kwargs(
-        self, encoder_hidden_states, kwargs: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        # if "encoder_hidden_states" in kwargs:
-        #     kwargs["encoder_hidden_states"] = encoder_hidden_states
-        # return kwargs
-        return kwargs
     def call_Fn_blocks(
         self,
         hidden_states: torch.Tensor,
         *args,
         **kwargs,
     ):
-        assert self.cache_manager.Fn_compute_blocks() <= len(
-            self.transformer_blocks
-        ), (
-            f"Fn_compute_blocks {self.cache_manager.Fn_compute_blocks()} must be less than "
-            f"the number of transformer blocks {len(self.transformer_blocks)}"
-        )
         new_encoder_hidden_states = None
         for block in self._Fn_blocks():
             hidden_states = block(
@@ -194,10 +177,6 @@ class CachedBlocks_Pattern_3_4_5(CachedBlocks_Pattern_Base):
                         new_encoder_hidden_states,
                         hidden_states,
                     )
-            kwargs = self.maybe_update_kwargs(
-                new_encoder_hidden_states,
-                kwargs,
-            )
         return hidden_states, new_encoder_hidden_states
@@ -222,11 +201,6 @@ class CachedBlocks_Pattern_3_4_5(CachedBlocks_Pattern_Base):
                         new_encoder_hidden_states,
                         hidden_states,
                     )
-            kwargs = self.maybe_update_kwargs(
-                new_encoder_hidden_states,
-                kwargs,
-            )
         # compute hidden_states residual
         hidden_states = hidden_states.contiguous()
         hidden_states_residual = hidden_states - original_hidden_states
@@ -243,35 +217,22 @@ class CachedBlocks_Pattern_3_4_5(CachedBlocks_Pattern_Base):
         *args,
         **kwargs,
     ):
-        assert self.cache_manager.Bn_compute_blocks() <= len(
-            self.transformer_blocks
-        ), (
-            f"Bn_compute_blocks {self.cache_manager.Bn_compute_blocks()} must be less than "
-            f"the number of transformer blocks {len(self.transformer_blocks)}"
-        )
-        if len(self.cache_manager.Bn_compute_blocks_ids()) > 0:
-            raise ValueError(
-                f"Bn_compute_blocks_ids is not support for "
-                f"patterns: {self._supported_patterns}."
+        new_encoder_hidden_states = None
+        if self.cache_manager.Bn_compute_blocks() == 0:
+            return hidden_states, new_encoder_hidden_states
+        for block in self._Bn_blocks():
+            hidden_states = block(
+                hidden_states,
+                *args,
+                **kwargs,
             )
-        else:
-            # Compute all Bn blocks if no specific Bn compute blocks ids are set.
-            for block in self._Bn_blocks():
-                hidden_states = block(
-                    hidden_states,
-                    *args,
-                    **kwargs,
-                )
-                if not isinstance(hidden_states, torch.Tensor):  # Pattern 4,5
-                    hidden_states, new_encoder_hidden_states = hidden_states
-                    if not self.forward_pattern.Return_H_First:
-                        hidden_states, new_encoder_hidden_states = (
-                            new_encoder_hidden_states,
-                            hidden_states,
-                        )
-                kwargs = self.maybe_update_kwargs(
-                    new_encoder_hidden_states,
-                    kwargs,
-                )
+            if not isinstance(hidden_states, torch.Tensor):  # Pattern 4,5
+                hidden_states, new_encoder_hidden_states = hidden_states
+                if not self.forward_pattern.Return_H_First:
+                    hidden_states, new_encoder_hidden_states = (
+                        new_encoder_hidden_states,
+                        hidden_states,
+                    )
         return hidden_states, new_encoder_hidden_states

cache_dit/cache_factory/cache_blocks/pattern_base.py CHANGED Viewed

@@ -93,6 +93,21 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
                         required_param in forward_parameters
                     ), f"The input parameters must contains: {required_param}."
+    @torch.compiler.disable
+    def _check_cache_params(self):
+        assert self.cache_manager.Fn_compute_blocks() <= len(
+            self.transformer_blocks
+        ), (
+            f"Fn_compute_blocks {self.cache_manager.Fn_compute_blocks()} must be less than "
+            f"the number of transformer blocks {len(self.transformer_blocks)}"
+        )
+        assert self.cache_manager.Bn_compute_blocks() <= len(
+            self.transformer_blocks
+        ), (
+            f"Bn_compute_blocks {self.cache_manager.Bn_compute_blocks()} must be less than "
+            f"the number of transformer blocks {len(self.transformer_blocks)}"
+        )
     def forward(
         self,
         hidden_states: torch.Tensor,
@@ -100,7 +115,9 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
         *args,
         **kwargs,
     ):
+        # Use it's own cache context.
         self.cache_manager.set_context(self.cache_context)
+        self._check_cache_params()
         original_hidden_states = hidden_states
         # Call first `n` blocks to process the hidden states for
@@ -191,18 +208,17 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
                     prefix=f"{self.cache_prefix}_Bn_residual",
                 )
             else:
-                # TaylorSeer
                 self.cache_manager.set_Bn_buffer(
                     hidden_states,
                     prefix=f"{self.cache_prefix}_Bn_hidden_states",
                 )
             if self.cache_manager.is_encoder_cache_residual():
                 self.cache_manager.set_Bn_encoder_buffer(
                     encoder_hidden_states_residual,
                     prefix=f"{self.cache_prefix}_Bn_residual",
                 )
             else:
-                # TaylorSeer
                 self.cache_manager.set_Bn_encoder_buffer(
                     encoder_hidden_states,
                     prefix=f"{self.cache_prefix}_Bn_hidden_states",
@@ -296,12 +312,6 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
         *args,
         **kwargs,
     ):
-        assert self.cache_manager.Fn_compute_blocks() <= len(
-            self.transformer_blocks
-        ), (
-            f"Fn_compute_blocks {self.cache_manager.Fn_compute_blocks()} must be less than "
-            f"the number of transformer blocks {len(self.transformer_blocks)}"
-        )
         for block in self._Fn_blocks():
             hidden_states = block(
                 hidden_states,
@@ -345,12 +355,19 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
         # compute hidden_states residual
         hidden_states = hidden_states.contiguous()
-        encoder_hidden_states = encoder_hidden_states.contiguous()
         hidden_states_residual = hidden_states - original_hidden_states
-        encoder_hidden_states_residual = (
-            encoder_hidden_states - original_encoder_hidden_states
-        )
+        if (
+            encoder_hidden_states is not None
+            and original_encoder_hidden_states is not None
+        ):
+            encoder_hidden_states = encoder_hidden_states.contiguous()
+            encoder_hidden_states_residual = (
+                encoder_hidden_states - original_encoder_hidden_states
+            )
+        else:
+            encoder_hidden_states_residual = None
         return (
             hidden_states,
@@ -359,28 +376,17 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
             encoder_hidden_states_residual,
         )
-    def _compute_or_cache_block(
+    def call_Bn_blocks(
         self,
-        # Block index in the transformer blocks
-        # Bn: 8, block_id should be in [0, 8)
-        block_id: int,
-        # Below are the inputs to the block
-        block,  # The transformer block to be executed
         hidden_states: torch.Tensor,
         encoder_hidden_states: torch.Tensor,
         *args,
         **kwargs,
     ):
-        # Helper function for `call_Bn_blocks`
-        # Skip the blocks by reuse residual cache if they are not
-        # in the Bn_compute_blocks_ids. NOTE: We should only skip
-        # the specific Bn blocks in cache steps. Compute the block
-        # and cache the residuals in non-cache steps.
-        # Normal steps: Compute the block and cache the residuals.
-        if not self._is_in_cache_step():
-            Bn_i_original_hidden_states = hidden_states
-            Bn_i_original_encoder_hidden_states = encoder_hidden_states
+        if self.cache_manager.Bn_compute_blocks() == 0:
+            return hidden_states, encoder_hidden_states
+        for block in self._Bn_blocks():
             hidden_states = block(
                 hidden_states,
                 encoder_hidden_states,
@@ -394,142 +400,5 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
                         encoder_hidden_states,
                         hidden_states,
                     )
-            # Cache residuals for the non-compute Bn blocks for
-            # subsequent cache steps.
-            if block_id not in self.cache_manager.Bn_compute_blocks_ids():
-                Bn_i_hidden_states_residual = (
-                    hidden_states - Bn_i_original_hidden_states
-                )
-                Bn_i_encoder_hidden_states_residual = (
-                    encoder_hidden_states - Bn_i_original_encoder_hidden_states
-                )
-                # Save original_hidden_states for diff calculation.
-                self.cache_manager.set_Bn_buffer(
-                    Bn_i_original_hidden_states,
-                    prefix=f"{self.cache_prefix}_Bn_{block_id}_original",
-                )
-                self.cache_manager.set_Bn_encoder_buffer(
-                    Bn_i_original_encoder_hidden_states,
-                    prefix=f"{self.cache_prefix}_Bn_{block_id}_original",
-                )
-                self.cache_manager.set_Bn_buffer(
-                    Bn_i_hidden_states_residual,
-                    prefix=f"{self.cache_prefix}_Bn_{block_id}_residual",
-                )
-                self.cache_manager.set_Bn_encoder_buffer(
-                    Bn_i_encoder_hidden_states_residual,
-                    prefix=f"{self.cache_prefix}_Bn_{block_id}_residual",
-                )
-                del Bn_i_hidden_states_residual
-                del Bn_i_encoder_hidden_states_residual
-            del Bn_i_original_hidden_states
-            del Bn_i_original_encoder_hidden_states
-        else:
-            # Cache steps: Reuse the cached residuals.
-            # Check if the block is in the Bn_compute_blocks_ids.
-            if block_id in self.cache_manager.Bn_compute_blocks_ids():
-                hidden_states = block(
-                    hidden_states,
-                    encoder_hidden_states,
-                    *args,
-                    **kwargs,
-                )
-                if not isinstance(hidden_states, torch.Tensor):
-                    hidden_states, encoder_hidden_states = hidden_states
-                    if not self.forward_pattern.Return_H_First:
-                        hidden_states, encoder_hidden_states = (
-                            encoder_hidden_states,
-                            hidden_states,
-                        )
-            else:
-                # Skip the block if it is not in the Bn_compute_blocks_ids.
-                # Use the cached residuals instead.
-                # Check if can use the cached residuals.
-                if self.cache_manager.can_cache(
-                    hidden_states,  # curr step
-                    parallelized=self._is_parallelized(),
-                    threshold=self.cache_manager.non_compute_blocks_diff_threshold(),
-                    prefix=f"{self.cache_prefix}_Bn_{block_id}_original",  # prev step
-                ):
-                    hidden_states, encoder_hidden_states = (
-                        self.cache_manager.apply_cache(
-                            hidden_states,
-                            encoder_hidden_states,
-                            prefix=(
-                                f"{self.cache_prefix}_Bn_{block_id}_residual"
-                                if self.cache_manager.is_cache_residual()
-                                else f"{self.cache_prefix}_Bn_{block_id}_original"
-                            ),
-                            encoder_prefix=(
-                                f"{self.cache_prefix}_Bn_{block_id}_residual"
-                                if self.cache_manager.is_encoder_cache_residual()
-                                else f"{self.cache_prefix}_Bn_{block_id}_original"
-                            ),
-                        )
-                    )
-                else:
-                    hidden_states = block(
-                        hidden_states,
-                        encoder_hidden_states,
-                        *args,
-                        **kwargs,
-                    )
-                    if not isinstance(hidden_states, torch.Tensor):
-                        hidden_states, encoder_hidden_states = hidden_states
-                        if not self.forward_pattern.Return_H_First:
-                            hidden_states, encoder_hidden_states = (
-                                encoder_hidden_states,
-                                hidden_states,
-                            )
-        return hidden_states, encoder_hidden_states
-    def call_Bn_blocks(
-        self,
-        hidden_states: torch.Tensor,
-        encoder_hidden_states: torch.Tensor,
-        *args,
-        **kwargs,
-    ):
-        if self.cache_manager.Bn_compute_blocks() == 0:
-            return hidden_states, encoder_hidden_states
-        assert self.cache_manager.Bn_compute_blocks() <= len(
-            self.transformer_blocks
-        ), (
-            f"Bn_compute_blocks {self.cache_manager.Bn_compute_blocks()} must be less than "
-            f"the number of transformer blocks {len(self.transformer_blocks)}"
-        )
-        if len(self.cache_manager.Bn_compute_blocks_ids()) > 0:
-            for i, block in enumerate(self._Bn_blocks()):
-                hidden_states, encoder_hidden_states = (
-                    self._compute_or_cache_block(
-                        i,
-                        block,
-                        hidden_states,
-                        encoder_hidden_states,
-                        *args,
-                        **kwargs,
-                    )
-                )
-        else:
-            # Compute all Bn blocks if no specific Bn compute blocks ids are set.
-            for block in self._Bn_blocks():
-                hidden_states = block(
-                    hidden_states,
-                    encoder_hidden_states,
-                    *args,
-                    **kwargs,
-                )
-                if not isinstance(hidden_states, torch.Tensor):
-                    hidden_states, encoder_hidden_states = hidden_states
-                    if not self.forward_pattern.Return_H_First:
-                        hidden_states, encoder_hidden_states = (
-                            encoder_hidden_states,
-                            hidden_states,
-                        )
         return hidden_states, encoder_hidden_states

cache_dit/cache_factory/cache_contexts/cache_context.py CHANGED Viewed

@@ -14,13 +14,9 @@ logger = init_logger(__name__)
 @dataclasses.dataclass
 class CachedContext:  # Internal CachedContext Impl class
     name: str = "default"
-    # Dual Block Cache
-    # Fn=1, Bn=0, means FB Cache, otherwise, Dual Block Cache
+    # Dual Block Cache with flexible FnBn configuration.
     Fn_compute_blocks: int = 1
     Bn_compute_blocks: int = 0
-    # We have added residual cache pattern for selected compute blocks
-    Fn_compute_blocks_ids: List[int] = dataclasses.field(default_factory=list)
-    Bn_compute_blocks_ids: List[int] = dataclasses.field(default_factory=list)
     # non compute blocks diff threshold, we don't skip the non
     # compute blocks if the diff >= threshold
     non_compute_blocks_diff_threshold: float = 0.08
@@ -31,13 +27,6 @@ class CachedContext:  # Internal CachedContext Impl class
     l1_hidden_states_diff_threshold: float = None
     important_condition_threshold: float = 0.0
-    # Alter Cache Settings
-    # Pattern: 0 F 1 T 2 F 3 T 4 F 5 T ...
-    enable_alter_cache: bool = False
-    is_alter_cache: bool = True
-    # 1.0 means we always cache the residuals if alter_cache is enabled.
-    alter_residual_diff_threshold: Optional[Union[torch.Tensor, float]] = 1.0
     # Buffer for storing the residuals and other tensors
     buffers: Dict[str, Any] = dataclasses.field(default_factory=dict)
     incremental_name_counters: DefaultDict[str, int] = dataclasses.field(
@@ -63,7 +52,6 @@ class CachedContext:  # Internal CachedContext Impl class
     # Url: https://arxiv.org/pdf/2503.06923
     enable_taylorseer: bool = False
     enable_encoder_taylorseer: bool = False
-    # NOTE: use residual cache for taylorseer may incur precision loss
     taylorseer_cache_type: str = "hidden_states"  # residual or hidden_states
     taylorseer_order: int = 2  # The order for TaylorSeer
     taylorseer_kwargs: Dict[str, Any] = dataclasses.field(default_factory=dict)
@@ -97,16 +85,11 @@ class CachedContext:  # Internal CachedContext Impl class
     )
     cfg_continuous_cached_steps: int = 0
-    @torch.compiler.disable
     def __post_init__(self):
         if logger.isEnabledFor(logging.DEBUG):
             logger.info(f"Created _CacheContext: {self.name}")
         # Some checks for settings
         if self.enable_spearate_cfg:
-            assert self.enable_alter_cache is False, (
-                "enable_alter_cache must set as False if "
-                "enable_spearate_cfg is enabled."
-            )
             if self.cfg_diff_compute_separate:
                 assert self.cfg_compute_first is False, (
                     "cfg_compute_first must set as False if "
@@ -135,47 +118,32 @@ class CachedContext:  # Internal CachedContext Impl class
                     **self.taylorseer_kwargs
                 )
-    @torch.compiler.disable
     def get_residual_diff_threshold(self):
-        if self.enable_alter_cache:
-            residual_diff_threshold = self.alter_residual_diff_threshold
-        else:
-            residual_diff_threshold = self.residual_diff_threshold
-            if self.l1_hidden_states_diff_threshold is not None:
-                # Use the L1 hidden states diff threshold if set
-                residual_diff_threshold = self.l1_hidden_states_diff_threshold
+        residual_diff_threshold = self.residual_diff_threshold
+        if self.l1_hidden_states_diff_threshold is not None:
+            # Use the L1 hidden states diff threshold if set
+            residual_diff_threshold = self.l1_hidden_states_diff_threshold
         if isinstance(residual_diff_threshold, torch.Tensor):
             residual_diff_threshold = residual_diff_threshold.item()
         return residual_diff_threshold
-    @torch.compiler.disable
     def get_buffer(self, name):
-        if self.enable_alter_cache and self.is_alter_cache:
-            name = f"{name}_alter"
         return self.buffers.get(name)
-    @torch.compiler.disable
     def set_buffer(self, name, buffer):
-        if self.enable_alter_cache and self.is_alter_cache:
-            name = f"{name}_alter"
         self.buffers[name] = buffer
-    @torch.compiler.disable
     def remove_buffer(self, name):
-        if self.enable_alter_cache and self.is_alter_cache:
-            name = f"{name}_alter"
         if name in self.buffers:
             del self.buffers[name]
-    @torch.compiler.disable
     def clear_buffers(self):
         self.buffers.clear()
-    @torch.compiler.disable
     def mark_step_begin(self):
         # Always increase transformer executed steps
-        # incr    step: prev 0 -> 1; prev 1 -> 2
-        # current step: incr step - 1
+        # incr     step: prev 0 -> 1; prev 1 -> 2
+        # current  step: incr step - 1
         self.transformer_executed_steps += 1
         if not self.enable_spearate_cfg:
             self.executed_steps += 1
@@ -190,10 +158,6 @@ class CachedContext:  # Internal CachedContext Impl class
                     # transformer step: 0,2,4,...
                     self.executed_steps += 1
-        if not self.enable_alter_cache:
-            # 0 F 1 T 2 F 3 T 4 F 5 T ...
-            self.is_alter_cache = not self.is_alter_cache
         # Reset the cached steps and residual diffs at the beginning
         # of each inference.
         if self.get_current_transformer_step() == 0:
@@ -248,7 +212,6 @@ class CachedContext:  # Internal CachedContext Impl class
     def get_cfg_taylorseers(self) -> Tuple[TaylorSeer, TaylorSeer]:
         return self.cfg_taylorseer, self.cfg_encoder_taylorseer
-    @torch.compiler.disable
     def add_residual_diff(self, diff):
         # step: executed_steps - 1, not transformer_steps - 1
         step = str(self.get_current_step())
@@ -260,15 +223,12 @@ class CachedContext:  # Internal CachedContext Impl class
             if step not in self.cfg_residual_diffs:
                 self.cfg_residual_diffs[step] = diff
-    @torch.compiler.disable
     def get_residual_diffs(self):
         return self.residual_diffs.copy()
-    @torch.compiler.disable
     def get_cfg_residual_diffs(self):
         return self.cfg_residual_diffs.copy()
-    @torch.compiler.disable
     def add_cached_step(self):
         curr_cached_step = self.get_current_step()
         if not self.is_separate_cfg_step():
@@ -296,23 +256,18 @@ class CachedContext:  # Internal CachedContext Impl class
             self.cfg_cached_steps.append(curr_cached_step)
-    @torch.compiler.disable
     def get_cached_steps(self):
         return self.cached_steps.copy()
-    @torch.compiler.disable
     def get_cfg_cached_steps(self):
         return self.cfg_cached_steps.copy()
-    @torch.compiler.disable
     def get_current_step(self):
         return self.executed_steps - 1
-    @torch.compiler.disable
     def get_current_transformer_step(self):
         return self.transformer_executed_steps - 1
-    @torch.compiler.disable
     def is_separate_cfg_step(self):
         if not self.enable_spearate_cfg:
             return False
@@ -322,6 +277,5 @@ class CachedContext:  # Internal CachedContext Impl class
         # CFG steps: 1, 3, 5, 7, ...
         return self.get_current_transformer_step() % 2 != 0
-    @torch.compiler.disable
     def is_in_warmup(self):
         return self.get_current_step() < self.max_warmup_steps

cache_dit/cache_factory/cache_contexts/cache_manager.py CHANGED Viewed

@@ -122,10 +122,7 @@ class CachedContextManager:
                     default_value,
                 )
-        # Manually set sequence fields, namely, Fn_compute_blocks_ids
-        # and Bn_compute_blocks_ids, which are lists or sets.
-        _safe_set_sequence_field("Fn_compute_blocks_ids", [])
-        _safe_set_sequence_field("Bn_compute_blocks_ids", [])
+        # Manually set sequence fields
         _safe_set_sequence_field("taylorseer_kwargs", {})
         for attr in cache_attrs:
@@ -301,18 +298,6 @@ class CachedContextManager:
             return self.is_taylorseer_cache_residual()
         return True
-    @torch.compiler.disable
-    def is_alter_cache_enabled(self) -> bool:
-        cached_context = self.get_context()
-        assert cached_context is not None, "cached_context must be set before"
-        return cached_context.enable_alter_cache
-    @torch.compiler.disable
-    def is_alter_cache(self) -> bool:
-        cached_context = self.get_context()
-        assert cached_context is not None, "cached_context must be set before"
-        return cached_context.is_alter_cache
     @torch.compiler.disable
     def is_in_warmup(self) -> bool:
         cached_context = self.get_context()
@@ -359,20 +344,6 @@ class CachedContextManager:
             )
         return cached_context.Fn_compute_blocks
-    @torch.compiler.disable
-    def Fn_compute_blocks_ids(self) -> List[int]:
-        cached_context = self.get_context()
-        assert cached_context is not None, "cached_context must be set before"
-        assert (
-            len(cached_context.Fn_compute_blocks_ids)
-            <= cached_context.Fn_compute_blocks
-        ), (
-            "The num of Fn_compute_blocks_ids must be <= Fn_compute_blocks "
-            f"{cached_context.Fn_compute_blocks}, but got "
-            f"{len(cached_context.Fn_compute_blocks_ids)}"
-        )
-        return cached_context.Fn_compute_blocks_ids
     @torch.compiler.disable
     def Bn_compute_blocks(self) -> int:
         cached_context = self.get_context()
@@ -392,20 +363,6 @@ class CachedContextManager:
             )
         return cached_context.Bn_compute_blocks
-    @torch.compiler.disable
-    def Bn_compute_blocks_ids(self) -> List[int]:
-        cached_context = self.get_context()
-        assert cached_context is not None, "cached_context must be set before"
-        assert (
-            len(cached_context.Bn_compute_blocks_ids)
-            <= cached_context.Bn_compute_blocks
-        ), (
-            "The num of Bn_compute_blocks_ids must be <= Bn_compute_blocks "
-            f"{cached_context.Bn_compute_blocks}, but got "
-            f"{len(cached_context.Bn_compute_blocks_ids)}"
-        )
-        return cached_context.Bn_compute_blocks_ids
     @torch.compiler.disable
     def enable_spearate_cfg(self) -> bool:
         cached_context = self.get_context()
@@ -525,6 +482,9 @@ class CachedContextManager:
     # Fn buffers
     @torch.compiler.disable
     def set_Fn_buffer(self, buffer: torch.Tensor, prefix: str = "Fn"):
+        # DON'T set None Buffer
+        if buffer is None:
+            return
         # Set hidden_states or residual for Fn blocks.
         # This buffer is only use for L1 diff calculation.
         downsample_factor = self.get_downsample_factor()
@@ -548,6 +508,9 @@ class CachedContextManager:
     @torch.compiler.disable
     def set_Fn_encoder_buffer(self, buffer: torch.Tensor, prefix: str = "Fn"):
+        # DON'T set None Buffer
+        if buffer is None:
+            return
         if self.is_separate_cfg_step():
             self._debugging_set_buffer(f"{prefix}_encoder_buffer_cfg")
             self.set_buffer(f"{prefix}_encoder_buffer_cfg", buffer)
@@ -566,6 +529,9 @@ class CachedContextManager:
     # Bn buffers
     @torch.compiler.disable
     def set_Bn_buffer(self, buffer: torch.Tensor, prefix: str = "Bn"):
+        # DON'T set None Buffer
+        if buffer is None:
+            return
         # Set hidden_states or residual for Bn blocks.
         # This buffer is use for hidden states approximation.
         if self.is_taylorseer_enabled():
@@ -820,26 +786,12 @@ class CachedContextManager:
         else:
             prev_states_tensor = self.get_Fn_buffer(prefix)
-        if not self.is_alter_cache_enabled():
-            # Dynamic cache according to the residual diff
-            can_cache = prev_states_tensor is not None and self.similarity(
-                prev_states_tensor,
-                states_tensor,
-                threshold=threshold,
-                parallelized=parallelized,
-                prefix=prefix,
-            )
-        else:
-            # Only cache in the alter cache steps
-            can_cache = (
-                prev_states_tensor is not None
-                and self.similarity(
-                    prev_states_tensor,
-                    states_tensor,
-                    threshold=threshold,
-                    parallelized=parallelized,
-                    prefix=prefix,
-                )
-                and self.is_alter_cache()
-            )
+        # Dynamic cache according to the residual diff
+        can_cache = prev_states_tensor is not None and self.similarity(
+            prev_states_tensor,
+            states_tensor,
+            threshold=threshold,
+            parallelized=parallelized,
+            prefix=prefix,
+        )
         return can_cache

cache_dit/cache_factory/cache_contexts/taylorseer.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import math
-import torch
 class TaylorSeer:
@@ -17,7 +16,6 @@ class TaylorSeer:
         self.compute_step_map = compute_step_map
         self.reset_cache()
-    @torch.compiler.disable
     def reset_cache(self):
         self.state = {
             "dY_prev": [None] * self.ORDER,
@@ -26,7 +24,6 @@ class TaylorSeer:
         self.current_step = -1
         self.last_non_approximated_step = -1
-    @torch.compiler.disable
     def should_compute_full(self, step=None):
         step = self.current_step if step is None else step
         if self.compute_step_map is not None:
@@ -39,7 +36,6 @@ class TaylorSeer:
             return True
         return False
-    @torch.compiler.disable
     def approximate_derivative(self, Y):
         # n-th order Taylor expansion:
         # Y(t) = Y(0) + dY(0)/dt * t + d^2Y(0)/dt^2 * t^2 / 2!
@@ -58,7 +54,6 @@ class TaylorSeer:
                 break
         return dY_current
-    @torch.compiler.disable
     def approximate_value(self):
         # TODO: Custom Triton/CUDA kernel for better performance,
         # especially for large n_derivatives.
@@ -71,11 +66,9 @@ class TaylorSeer:
                 break
         return output
-    @torch.compiler.disable
     def mark_step_begin(self):
         self.current_step += 1
-    @torch.compiler.disable
     def update(self, Y):
         # Directly call this method will ingnore the warmup
         # policy and force full computation.
@@ -94,7 +87,6 @@ class TaylorSeer:
         self.state["dY_current"] = self.approximate_derivative(Y)
         self.last_non_approximated_step = self.current_step
-    @torch.compiler.disable
     def step(self, Y):
         self.mark_step_begin()
         if self.should_compute_full():

cache_dit/cache_factory/cache_interface.py CHANGED Viewed

@@ -24,7 +24,7 @@ def enable_cache(
     max_continuous_cached_steps: int = -1,
     residual_diff_threshold: float = 0.08,
     # Cache CFG or not
-    enable_spearate_cfg: bool = False,
+    enable_spearate_cfg: bool | None = None,
     cfg_compute_first: bool = False,
     cfg_diff_compute_separate: bool = True,
     # Hybird TaylorSeer
@@ -70,7 +70,7 @@ def enable_cache(
         residual_diff_threshold (`float`, *required*, defaults to 0.08):
             he value of residual diff threshold, a higher value leads to faster performance at the
             cost of lower precision.
-        enable_spearate_cfg (`bool`, *required*,  defaults to False):
+        enable_spearate_cfg (`bool`, *required*,  defaults to None):
             Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
             and non-CFG into single forward step, should set enable_spearate_cfg as False, for example:
             CogVideoX, HunyuanVideo, Mochi, etc.

cache_dit/quantize/quantize_ao.py CHANGED Viewed

@@ -89,6 +89,9 @@ def quantize_ao(
                     PerRow,
                 )
+                if per_row:  # Ensure bfloat16
+                    module.to(torch.bfloat16)
                 quantization_fn = float8_dynamic_activation_float8_weight(
                     weight_dtype=kwargs.get(
                         "weight_dtype",

{cache_dit-0.2.31.dist-info → cache_dit-0.2.33.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cache_dit
-Version: 0.2.31
+Version: 0.2.33
 Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
 Author: DefTruth, vipshop.com, etc.
 Maintainer: DefTruth, vipshop.com, etc
@@ -59,36 +59,59 @@ Dynamic: requires-python
   🔥<b><a href="#unified">Unified Cache APIs</a> | <a href="#dbcache">DBCache</a> | <a href="#taylorseer">Hybrid TaylorSeer</a> | <a href="#cfg">Hybrid Cache CFG</a></b>🔥
   </p>
   <p align="center">
-  🎉Now, <b>cache-dit</b> covers <b>most</b> mainstream Diffusers' <b>DiT</b> Pipelines🎉<br>
-  🔥<a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Wan 2.1</a> | <a href="#supported"> Wan 2.2 </a> | <a href="#supported">HunyuanVideo</a>🔥<br>
-  🔥<a href="#supported">HunyuanDiT</a> | <a href="#supported">HiDream</a> | <a href="#supported">Mochi</a> | <a href="#supported"> CogVideoX </a> | <a href="#supported">CogVideoX1.5</a>🔥<br>
-  🔥<a href="#supported">CogView3Plus</a> | <a href="#supported">CogView4</a> | <a href="#supported">Chroma</a> | <a href="#supported"> LTXVideo </a> | <a href="#supported">PixArt</a>🔥<br>
-  🔥<a href="#supported">Cosmos</a> | <a href="#supported">SkyReelsV2</a> | <a href="#supported">VisualCloze</a> | <a href="#supported"> ... </a> | <a href="#supported">Lumina2</a>🔥
+    🎉Now, <b>cache-dit</b> covers <b>most</b> mainstream Diffusers' <b>DiT</b> Pipelines🎉<br>
+    🔥<a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Qwen-Image-Lightning</a> | <a href="#supported"> Wan 2.1/2.2 </a>🔥<br>
+    🔥<a href="#supported">HunyuanImage-2.1</a> | <a href="#supported">HunyuanVideo</a> | <a href="#supported">HunyuanDiT</a> | <a href="#supported">HiDream</a> | <a href="#supported">Mochi</a>🔥<br>
+    🔥<a href="#supported">CogView3Plus</a> | <a href="#supported">CogView4</a> | <a href="#supported">Chroma</a> | <a href="#supported"> LTXVideo </a> | <a href="#supported">CogVideoX 1/1.5</a>🔥<br>
+    🔥<a href="#supported">Cosmos</a> | <a href="#supported">SkyReelsV2</a> | <a href="#supported">VisualCloze</a> | <a href="#supported"> OmniGen </a> | <a href="#supported">Lumina 1/2</a>🔥<br>
+    🔥<a href="#supported">Allegro</a> | <a href="#supported">EasyAnimate</a> | <a href="#supported">SD 3/3.5</a> | <a href="#supported"> ... </a> | <a href="#supported">PixArt</a>🔥
   </p>
 </div>
 <div align='center'>
-  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C0_Q0_NONE.gif width=160px>
-  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C1_Q0_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
-  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C1_Q1_fp8_w8a8_dq_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
-  <p><b>🔥Wan2.2 MoE</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~2.0x↑🎉</b> | +FP8 DQ:<b>~2.4x↑🎉</b></p>
+  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C0_Q0_NONE.gif width=124px>
+  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C1_Q0_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=124px>
+  <img src=./assets/gifs/hunyuan_video.C0_L0_Q0_NONE.gif width=126px>
+  <img src=./assets/gifs/hunyuan_video.C0_L0_Q0_DBCACHE_F1B0_W8M0MC2_T0O2_R0.12_S27.gif width=126px>
+  <p><b>🔥Wan2.2 MoE</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:2.0x↑🎉 | <b>HunyuanVideo</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:2.1x↑🎉</p>
   <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image.C0_Q0_NONE.png width=160px>
   <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image.C1_Q0_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S23.png width=160px>
-  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image.C1_Q1_fp8_w8a8_dq_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S18.png width=160px>
-  <p><b>🔥Qwen-Image</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~1.8x↑🎉</b> | +FP8 DQ:<b>~2.2x↑🎉</b><br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️</p>
-  </p>
+  <img src=./assets/flux.C0_Q0_NONE_T23.69s.png width=90px>
+  <img src=./assets/flux.C0_Q0_DBCACHE_F1B0_W4M0MC0_T1O2_R0.15_S16_T11.39s.png width=90px>
+  <p><b>🔥Qwen-Image</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.8x↑🎉 | <b>FLUX.1-dev</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:2.1x↑🎉</p>
+  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image-lightning.4steps.C0_L1_Q0_NONE.png width=160px>
+  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image-lightning.4steps.C0_L1_Q0_DBCACHE_F16B16_W2M1MC1_T0O2_R0.9_S1.png width=160px>
+  <img src=./assets/sd_3_5.C0_L0_Q0_NONE.png width=90px>
+  <img src=./assets/sd_3_5.C0_L0_Q0_DBCACHE_F1B0_W8M0MC3_T0O2_R0.12_S30.png width=90px>
+  <p><b>🔥Qwen-Image-Lightning</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.14x↑🎉 | <b>SD 3.5</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:2.5x↑🎉</p>
+  <img src=./assets/hidream.C0_L0_Q0_NONE.png width=100px>
+  <img src=./assets/hidream.C0_L0_Q0_DBCACHE_F1B0_W8M0MC0_T0O2_R0.08_S24.png width=100px>
+  <img src=./assets/cogview4.C0_L0_Q0_NONE.png width=100px>
+  <img src=./assets/cogview4.C0_L0_Q0_DBCACHE_F8B0_W8M0MC0_T0O2_R0.08_S15.png width=100px>
+  <img src=./assets/cogview4.C0_L0_Q0_DBCACHE_F1B0_W4M0MC4_T0O2_R0.2_S22.png width=100px>
+  <p><b>🔥HiDream-I1</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.9x↑🎉 | <b>CogView4</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.4x↑🎉 | 1.7x↑🎉</p>
+  <img src=./assets/gifs/mochi.C0_L0_Q0_NONE.gif width=160px>
+  <img src=./assets/gifs/mochi.C0_L0_Q0_DBCACHE_F8B0_W8M0MC0_T0O2_R0.08_S34.gif width=160px>
+  <img src=./assets/hunyuan-image-2.1.C0_L0_Q1_fp8_w8a16_wo_NONE.png width=91px>
+  <img src=./assets/hunyuan-image-2.1.C0_L0_Q1_fp8_w8a16_wo_DBCACHE_F8B0_W8M0MC2_T1O2_R0.12_S25.png width=91px>
+  <p><b>🔥Mochi-1</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.8x↑🎉 | <b>HunyuanImage-2.1</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.7x↑🎉
+  <br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️</p>
 </div>
 ## 🔥News
+- [2025-09-10] 🎉Day 1 support [**HunyuanImage-2.1**](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with **1.7x↑🎉** speedup! Check this [example](./examples/pipeline/run_hunyuan_image_2.1.py).
+- [2025-09-08] 🔥[**Qwen-Image-Lightning**](./examples/pipeline/run_qwen_image_lightning.py) **7.1/3.5 steps🎉** inference with **[DBCache: F16B16](https://github.com/vipshop/cache-dit)**.
 - [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
 - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
-- [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
 - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
-- [2025-07-13] 🎉[**FLUX.1-Dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + `compile + FP8 DQ`.
+- [2025-07-13] 🎉[**FLUX.1-dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + **compile + FP8 DQ**.
 <details>
 <summary> Previous News </summary>
+- [2025-09-08] 🎉First caching mechanism in [Qwen-Image-Lightning](https://github.com/ModelTC/Qwen-Image-Lightning) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/ModelTC/Qwen-Image-Lightning/pull/35).
+- [2025-09-08] 🎉First caching mechanism in [Wan2.2](https://github.com/Wan-Video/Wan2.2) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/Wan-Video/Wan2.2/pull/127) for more details.
+- [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
 - [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
 - [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/pipeline/run_flux_kontext.py) as an example.
 - [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
@@ -134,6 +157,8 @@ pip3 install git+https://github.com/vipshop/cache-dit.git
 Currently, **cache-dit** library supports almost **Any** Diffusion Transformers (with **Transformer Blocks** that match the specific Input and Output **patterns**). Please check [🎉Unified Cache APIs](#unified) for more details. Here are just some of the tested models listed:
+- [🚀HunyuanImage-2.1](https://github.com/vipshop/cache-dit/raw/main/examples)
+- [🚀Qwen-Image-Lightning](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀Qwen-Image-Edit](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀Qwen-Image](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
@@ -141,13 +166,18 @@ Currently, **cache-dit** library supports almost **Any** Diffusion Transformers
 - [🚀FLUX.1-Kontext-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/raw/main/examples)
+- [🚀CogView3-Plus](https://github.com/vipshop/cache-dit/raw/main/examples)
+- [🚀CogView4](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀Wan2.2-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀Wan2.1-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀Wan2.1-FLF2V](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
 - [🚀HunyuanDiT](https://github.com/vipshop/cache-dit/raw/main/examples)
-- [🚀HiDream](https://github.com/vipshop/cache-dit/raw/main/examples)
+- [🚀HiDream-I1-Full](https://github.com/vipshop/cache-dit/raw/main/examples)
+- [🚀PixArt-Alpha](https://github.com/vipshop/cache-dit/raw/main/examples)
+- [🚀PixArt-Sigma](https://github.com/vipshop/cache-dit/raw/main/examples)
+- [🚀SD-3/3.5](https://github.com/vipshop/cache-dit/raw/main/examples)
 </details>
@@ -285,23 +315,7 @@ cache_dit.enable_cache(
     Bn_compute_blocks=8, # Bn, B8, etc.
     residual_diff_threshold=0.12,
 )
-```
-Moreover, users configuring higher **Bn** values (e.g., **F8B16**) while aiming to maintain good performance can specify **Bn_compute_blocks_ids** to work with Bn. DBCache will only compute the specified blocks, with the remaining estimated using the previous step's residual cache.
-```python
-# Custom options, F8B16, higher precision with good performance.
-cache_dit.enable_cache(
-    pipe,
-    Fn_compute_blocks=8,  # Fn, F8, etc.
-    Bn_compute_blocks=16, # Bn, B16, etc.
-    # 0, 2, 4, ..., 14, 15, etc. [0,16)
-    Bn_compute_blocks_ids=cache_dit.block_range(0, 16, 2),
-    # If the L1 difference is below this threshold, skip Bn blocks
-    # not in `Bn_compute_blocks_ids`(1, 3,..., etc), Otherwise,
-    # compute these blocks.
-    non_compute_blocks_diff_threshold=0.08,
-)
-```
+```
 <div align="center">
   <p align="center">

{cache_dit-0.2.31.dist-info → cache_dit-0.2.33.dist-info}/RECORD RENAMED Viewed

@@ -1,26 +1,26 @@
 cache_dit/__init__.py,sha256=kX9V-FegZG4c8LMwI4PTmMqH794MEW0pzDArdhC0cJw,1241
-cache_dit/_version.py,sha256=cMx3p02rk8iaGjj6X7bw0aOcGW7d-iY_EBO9S_9o-b4,706
+cache_dit/_version.py,sha256=gTEHTWtuqv38KTvjBsXd5hC019b6d7AyfC8gLMY7KAo,706
 cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
 cache_dit/utils.py,sha256=WK7eqgH6gCYNHXNLmWyxBDU0XSHTPg7CfOcyXlGXBqE,10510
 cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
 cache_dit/cache_factory/__init__.py,sha256=Iw6-iJLFbdzCsIDZXXOw371L-HPmoeZO_P9a3sDjP5s,1103
-cache_dit/cache_factory/cache_adapters.py,sha256=6YbBSfKEGdWi9oY1ceuxi-MpHcaDYoQ-t6NTaLZITR4,17938
-cache_dit/cache_factory/cache_interface.py,sha256=y1nY6R3MucRmAnG2UJRI_tIKrRk27FktGWLbfckf3zE,8543
+cache_dit/cache_factory/cache_adapters.py,sha256=dmNX68nBD52HtQvHnNAuSn1zjDWrQdycD0qXy-w-mwc,18212
+cache_dit/cache_factory/cache_interface.py,sha256=LpyCy-tQ_GcTRAYLpMMf9hFVIktABHI6CObn5Ll8bMw,8548
 cache_dit/cache_factory/cache_types.py,sha256=ooukxQRG55uTLmaZ0SKw6gIeY6SQHhMxkbv55uj2Sqk,991
 cache_dit/cache_factory/forward_pattern.py,sha256=FumlCuZ-TSmSYH0hGBHctSJ-oGLCftdZjLygqhsmdR4,2258
 cache_dit/cache_factory/utils.py,sha256=XkVM9AXcB9zYq8-S8QKAsGz80r3tA6U3lBNGDGeHOe4,1871
-cache_dit/cache_factory/block_adapters/__init__.py,sha256=x2ivShzOy2z3p1WUArzoChR4jaLHhNXkXMSk-RPzR3g,17534
-cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=EQBiJYyoInKU1ND69wTm7M0n5Ja4I8QW01SgRpBjSn8,21671
+cache_dit/cache_factory/block_adapters/__init__.py,sha256=OZM5vJwmQIkoIwVmMxKXiHqKvs31NyAva1Z91C_ko3w,17547
+cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=IqHV10aK2qA8kEVDi7EEoUSBt0GzwCUM4GpLNf8Jgww,21656
 cache_dit/cache_factory/block_adapters/block_registers.py,sha256=ZeN2wGPmuf2u3puSsBx8x-rl3wRo8-cWcuWNcrssVfA,2553
 cache_dit/cache_factory/cache_blocks/__init__.py,sha256=08Ox7kD05lkRKCOsVTdEZeKAWBheqpxfrAT1Nz7eclI,2916
 cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py,sha256=ElMps6_7uI74tSF9GDR_dEI0bZEhdzcepM29xFWnYo8,428
-cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=nf2f5wdxp6tfq9AhFyMyBeKiZfxh63WG1g8q-c2BBSg,10182
-cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=_sajtb-Cz8yrCRBRSiJREzFG7h6265K9pXeAz5i1meY,20814
+cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=Bv56qETXhsREvCrNvnZpSqDIIHsi6Ze3FJW4Yk2x3uI,8597
+cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=d4H9kEB0AgnVMT8aF0Y54SUMUQUxw5HQ8gRkoCuTQ_A,14577
 cache_dit/cache_factory/cache_blocks/utils.py,sha256=dGOC1tMMOvcbvEgx44eTESKn_jsv-0RZ3tRHPa3wmQ4,1315
 cache_dit/cache_factory/cache_contexts/__init__.py,sha256=rqnJ5__zqnpVHK5A1OqWILpNh5Ss-0ZDTGgtxZMKGGo,250
-cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=N88WLdd4KE9DuMWmpX8URcF55E2zWNwcKMxgVYkxMJY,13691
-cache_dit/cache_factory/cache_contexts/cache_manager.py,sha256=_NUXcMYYEIVfDHpc4HJr9RUjU5RUEkZmAgFGE8bh5Wc,34883
-cache_dit/cache_factory/cache_contexts/taylorseer.py,sha256=etSUIZzDvqW3ScKCbccTPcFaSmxV1T-xAXdk-p3e3wk,3802
+cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=zqixcxV_LjnyoYDZ6q3HAC-hqYyVV6g0MWKBI2hA1nQ,11855
+cache_dit/cache_factory/cache_contexts/cache_manager.py,sha256=Mcj1upIpXT_CwO4AdY4ZNJSWoOXn3Lx2mBZRi_QuLbU,32710
+cache_dit/cache_factory/cache_contexts/taylorseer.py,sha256=hgLmgIkQgwbFTjxqtLUCJ3mgDGEcJK09B7RK8sBdPiI,3593
 cache_dit/cache_factory/patch_functors/__init__.py,sha256=06zdddrjvSCgBzJ0a8niRHd3ucF2qsbzlbL00d4aCvk,451
 cache_dit/cache_factory/patch_functors/functor_base.py,sha256=Ahk0fTfrHgNdEl-9JSkACvfyyv9G-Ei5OSz7XBIlX5o,357
 cache_dit/cache_factory/patch_functors/functor_chroma.py,sha256=2iLxlsc-1dDHRveqCXaC07E9CeMNOuBNkvpJ1atpK7E,10048
@@ -38,12 +38,11 @@ cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR
 cache_dit/metrics/lpips.py,sha256=I2qCNi6qJh5TRsaIsdxO0WoRX1DN7U_H3zS0oCSahYM,1032
 cache_dit/metrics/metrics.py,sha256=8jvM1sF-nDxUuwCRy44QEoo4dYVLCQVh1QyAMs4eaQY,27840
 cache_dit/quantize/__init__.py,sha256=kWYoMAyZgBXu9BJlZjTQ0dRffW9GqeeY9_iTkXrb70A,59
-cache_dit/quantize/quantize_ao.py,sha256=mGspqYgQtenl3QnKPtsSYsSD7LbVX93f1M940bhXKLU,6066
+cache_dit/quantize/quantize_ao.py,sha256=Fx1KW4l3gdEkdrcAYtPoDW7WKBJWrs3glOHiEwW_TgE,6160
 cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
-cache_dit/quantize/quantize_svdq.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cache_dit-0.2.31.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
-cache_dit-0.2.31.dist-info/METADATA,sha256=MrRvt7HL8pNm0ZsBxKO25pBcCJhHPG7HddwjT_euy_I,23198
-cache_dit-0.2.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cache_dit-0.2.31.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
-cache_dit-0.2.31.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
-cache_dit-0.2.31.dist-info/RECORD,,
+cache_dit-0.2.33.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
+cache_dit-0.2.33.dist-info/METADATA,sha256=GQBvDzKLXL3tABguCRqLNc-Z39h0AcMK_J37demDTu8,25977
+cache_dit-0.2.33.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+cache_dit-0.2.33.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
+cache_dit-0.2.33.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
+cache_dit-0.2.33.dist-info/RECORD,,

cache_dit/quantize/quantize_svdq.py DELETED Viewed

File without changes

{cache_dit-0.2.31.dist-info → cache_dit-0.2.33.dist-info}/WHEEL RENAMED Viewed

File without changes

{cache_dit-0.2.31.dist-info → cache_dit-0.2.33.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cache_dit-0.2.31.dist-info → cache_dit-0.2.33.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{cache_dit-0.2.31.dist-info → cache_dit-0.2.33.dist-info}/top_level.txt RENAMED Viewed

File without changes

cache-dit 0.2.31__py3-none-any.whl → 0.2.33__py3-none-any.whl

Potentially problematic release.

cache-dit 0.2.31py3-none-any.whl → 0.2.33py3-none-any.whl